Project import generated by Copybara.
GitOrigin-RevId: 1610e588e497817fae2d9a458093ab6a370e2972
This commit is contained in:
parent
b899d17f18
commit
710fb3de58
25
WORKSPACE
25
WORKSPACE
|
@ -331,7 +331,9 @@ load("@rules_jvm_external//:defs.bzl", "maven_install")
|
|||
maven_install(
|
||||
artifacts = [
|
||||
"androidx.concurrent:concurrent-futures:1.0.0-alpha03",
|
||||
"androidx.lifecycle:lifecycle-common:2.2.0",
|
||||
"androidx.lifecycle:lifecycle-common:2.3.1",
|
||||
"androidx.activity:activity:1.2.2",
|
||||
"androidx.fragment:fragment:1.3.4",
|
||||
"androidx.annotation:annotation:aar:1.1.0",
|
||||
"androidx.appcompat:appcompat:aar:1.1.0-rc01",
|
||||
"androidx.camera:camera-core:1.0.0-beta10",
|
||||
|
@ -376,9 +378,9 @@ http_archive(
|
|||
)
|
||||
|
||||
# Tensorflow repo should always go after the other external dependencies.
|
||||
# 2021-06-07
|
||||
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
|
||||
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
|
||||
# 2021-07-29
|
||||
_TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3"
|
||||
_TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56"
|
||||
http_archive(
|
||||
name = "org_tensorflow",
|
||||
urls = [
|
||||
|
@ -399,3 +401,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
|
|||
tf_workspace3()
|
||||
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
|
||||
tf_workspace2()
|
||||
|
||||
# Edge TPU
|
||||
http_archive(
|
||||
name = "libedgetpu",
|
||||
sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80",
|
||||
strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7",
|
||||
urls = [
|
||||
"https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz"
|
||||
],
|
||||
)
|
||||
load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies")
|
||||
libedgetpu_dependencies()
|
||||
|
||||
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
|
||||
cc_crosstool(name = "crosstool")
|
||||
|
|
|
@ -16,12 +16,14 @@ nav_order: 1
|
|||
|
||||
Please follow instructions below to build Android example apps in the supported
|
||||
MediaPipe [solutions](../solutions/solutions.md). To learn more about these
|
||||
example apps, start from [Hello World! on Android](./hello_world_android.md). To
|
||||
incorporate MediaPipe into an existing Android Studio project, see these
|
||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
||||
Gradle.
|
||||
example apps, start from [Hello World! on Android](./hello_world_android.md).
|
||||
|
||||
## Building Android example apps
|
||||
To incorporate MediaPipe into Android Studio projects, see these
|
||||
[instructions](./android_solutions.md) to use the MediaPipe Android Solution
|
||||
APIs (currently in alpha) that are now available in
|
||||
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||
|
||||
## Building Android example apps with Bazel
|
||||
|
||||
### Prerequisite
|
||||
|
||||
|
@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
|
|||
android_sdk_repository() in the
|
||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
|
||||
|
||||
Please verify all the necessary packages are installed.
|
||||
|
||||
* Android SDK Platform API Level 28 or 29
|
||||
* Android SDK Build-Tools 28 or 29
|
||||
* Android SDK Platform-Tools 28 or 29
|
||||
* Android SDK Tools 26.1.1
|
||||
* Android NDK 19c or above
|
||||
|
||||
### Option 1: Build with Bazel in Command Line
|
||||
|
||||
Tip: You can run this
|
||||
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
|
||||
to build (and install) all MediaPipe Android example apps.
|
||||
|
@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps.
|
|||
```bash
|
||||
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
|
||||
```
|
||||
|
||||
### Option 2: Build with Bazel in Android Studio
|
||||
|
||||
The MediaPipe project can be imported into Android Studio using the Bazel
|
||||
plugins. This allows the MediaPipe examples to be built and modified in Android
|
||||
Studio.
|
||||
|
||||
To incorporate MediaPipe into an existing Android Studio project, see these
|
||||
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
|
||||
Gradle.
|
||||
|
||||
The steps below use Android Studio 3.5 to build and install a MediaPipe example
|
||||
app:
|
||||
|
||||
1. Install and launch Android Studio 3.5.
|
||||
|
||||
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
|
||||
|
||||
* Verify that Android SDK Platform API Level 28 or 29 is installed.
|
||||
* Take note of the Android SDK Location, e.g.,
|
||||
`/usr/local/home/Android/Sdk`.
|
||||
|
||||
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
|
||||
|
||||
* Verify that Android SDK Build-Tools 28 or 29 is installed.
|
||||
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
|
||||
* Verify that Android SDK Tools 26.1.1 is installed.
|
||||
* Verify that Android NDK 19c or above is installed.
|
||||
* Take note of the Android NDK Location, e.g.,
|
||||
`/usr/local/home/Android/Sdk/ndk-bundle` or
|
||||
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
|
||||
|
||||
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
|
||||
to the installed SDK and NDK.
|
||||
|
||||
```bash
|
||||
export ANDROID_HOME=/usr/local/home/Android/Sdk
|
||||
|
||||
# If the NDK libraries are installed by a previous version of Android Studio, do
|
||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
|
||||
# If the NDK libraries are installed by Android Studio 3.5, do
|
||||
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
|
||||
```
|
||||
|
||||
5. Select `Configure` -> `Plugins` to install `Bazel`.
|
||||
|
||||
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
|
||||
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
|
||||
binary location` to be the same as the output of `$ which bazel`.
|
||||
|
||||
7. Select `Import Bazel Project`.
|
||||
|
||||
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
|
||||
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
|
||||
`Next`.
|
||||
* Modify `Project View` to be the following and select `Finish`.
|
||||
|
||||
```
|
||||
directories:
|
||||
# read project settings, e.g., .bazelrc
|
||||
.
|
||||
-mediapipe/objc
|
||||
-mediapipe/examples/ios
|
||||
|
||||
targets:
|
||||
//mediapipe/examples/android/...:all
|
||||
//mediapipe/java/...:all
|
||||
|
||||
android_sdk_platform: android-29
|
||||
|
||||
sync_flags:
|
||||
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
|
||||
```
|
||||
|
||||
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
|
||||
|
||||
Note: Even after doing step 4, if you still see the error: `"no such package
|
||||
'@androidsdk//': Either the path attribute of android_sdk_repository or the
|
||||
ANDROID_HOME environment variable must be set."`, please modify the
|
||||
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
|
||||
file to point to your SDK and NDK library locations, as below:
|
||||
|
||||
```
|
||||
android_sdk_repository(
|
||||
name = "androidsdk",
|
||||
path = "/path/to/android/sdk"
|
||||
)
|
||||
|
||||
android_ndk_repository(
|
||||
name = "androidndk",
|
||||
path = "/path/to/android/ndk"
|
||||
)
|
||||
```
|
||||
|
||||
9. Connect an Android device to the workstation.
|
||||
|
||||
10. Select `Run...` -> `Edit Configurations...`.
|
||||
|
||||
* Select `Templates` -> `Bazel Command`.
|
||||
* Enter Target Expression:
|
||||
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
|
||||
* Enter Bazel command: `mobile-install`.
|
||||
* Enter Bazel flags: `-c opt --config=android_arm64`.
|
||||
* Press the `[+]` button to add the new configuration.
|
||||
* Select `Run` to run the example app on the connected Android device.
|
||||
|
|
|
@ -3,7 +3,7 @@ layout: default
|
|||
title: MediaPipe Android Archive
|
||||
parent: MediaPipe on Android
|
||||
grand_parent: Getting Started
|
||||
nav_order: 2
|
||||
nav_order: 3
|
||||
---
|
||||
|
||||
# MediaPipe Android Archive
|
||||
|
|
79
docs/getting_started/android_solutions.md
Normal file
79
docs/getting_started/android_solutions.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
---
|
||||
layout: default
|
||||
title: Android Solutions
|
||||
parent: MediaPipe on Android
|
||||
grand_parent: Getting Started
|
||||
nav_order: 2
|
||||
---
|
||||
|
||||
# Android Solution APIs
|
||||
{: .no_toc }
|
||||
|
||||
1. TOC
|
||||
{:toc}
|
||||
---
|
||||
|
||||
Please follow instructions below to use the MediaPipe Solution APIs in Android
|
||||
Studio projects and build the Android example apps in the supported MediaPipe
|
||||
[solutions](../solutions/solutions.md).
|
||||
|
||||
## Integrate MediaPipe Android Solutions in Android Studio
|
||||
|
||||
MediaPipe Android Solution APIs (currently in alpha) are now available in
|
||||
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
|
||||
To incorporate MediaPipe Android Solutions into an Android Studio project, add
|
||||
the following into the project's Gradle dependencies:
|
||||
|
||||
```
|
||||
dependencies {
|
||||
// MediaPipe solution-core is the foundation of any MediaPipe solutions.
|
||||
implementation 'com.google.mediapipe:solution-core:latest.release'
|
||||
// Optional: MediaPipe Hands solution.
|
||||
implementation 'com.google.mediapipe:hands:latest.release'
|
||||
// Optional: MediaPipe FaceMesh solution.
|
||||
implementation 'com.google.mediapipe:facemesh:latest.release'
|
||||
// MediaPipe deps
|
||||
implementation 'com.google.flogger:flogger:latest.release'
|
||||
implementation 'com.google.flogger:flogger-system-backend:latest.release'
|
||||
implementation 'com.google.guava:guava:27.0.1-android'
|
||||
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||
// CameraX core library
|
||||
def camerax_version = "1.0.0-beta10"
|
||||
implementation "androidx.camera:camera-core:$camerax_version"
|
||||
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||
}
|
||||
```
|
||||
|
||||
See the detailed solutions API usage examples for different use cases in the
|
||||
solution example apps'
|
||||
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions).
|
||||
If the prebuilt maven packages are not sufficient, building the MediaPipe
|
||||
Android archive library locally by following these
|
||||
[instructions](./android_archive_library.md).
|
||||
|
||||
## Build solution example apps in Android Studio
|
||||
|
||||
1. Open Android Studio Arctic Fox on Linux, macOS, or Windows.
|
||||
|
||||
2. Import mediapipe/examples/android/solutions directory into Android Studio.
|
||||
|
||||
![Screenshot](../images/import_mp_android_studio_project.png)
|
||||
|
||||
3. For Windows users, run `create_win_symlinks.bat` as administrator to create
|
||||
res directory symlinks.
|
||||
|
||||
![Screenshot](../images/run_create_win_symlinks.png)
|
||||
|
||||
4. Select "File" -> "Sync Project with Gradle Files" to sync project.
|
||||
|
||||
5. Run solution example app in Android Studio.
|
||||
|
||||
![Screenshot](../images/run_android_solution_app.png)
|
||||
|
||||
6. (Optional) Run solutions on CPU.
|
||||
|
||||
MediaPipe solution example apps run the pipeline and the model inference on
|
||||
GPU by default. If needed, for example to run the apps on Android Emulator,
|
||||
set the `RUN_ON_GPU` boolean variable to `false` in the app's
|
||||
MainActivity.java to run the pipeline and the model inference on CPU.
|
|
@ -43,104 +43,189 @@ install --user six`.
|
|||
|
||||
3. Install OpenCV and FFmpeg.
|
||||
|
||||
Option 1. Use package manager tool to install the pre-compiled OpenCV
|
||||
libraries. FFmpeg will be installed via libopencv-video-dev.
|
||||
**Option 1**. Use package manager tool to install the pre-compiled OpenCV
|
||||
libraries. FFmpeg will be installed via `libopencv-video-dev`.
|
||||
|
||||
Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take
|
||||
option 2 or 3 to install OpenCV 3 or above.
|
||||
OS | OpenCV
|
||||
-------------------- | ------
|
||||
Debian 9 (stretch) | 2.4
|
||||
Debian 10 (buster) | 3.2
|
||||
Debian 11 (bullseye) | 4.5
|
||||
Ubuntu 16.04 LTS | 2.4
|
||||
Ubuntu 18.04 LTS | 3.2
|
||||
Ubuntu 20.04 LTS | 4.2
|
||||
Ubuntu 20.04 LTS | 4.2
|
||||
Ubuntu 21.04 | 4.5
|
||||
|
||||
```bash
|
||||
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
|
||||
libopencv-calib3d-dev libopencv-features2d-dev \
|
||||
libopencv-imgproc-dev libopencv-video-dev
|
||||
$ sudo apt-get install -y \
|
||||
libopencv-core-dev \
|
||||
libopencv-highgui-dev \
|
||||
libopencv-calib3d-dev \
|
||||
libopencv-features2d-dev \
|
||||
libopencv-imgproc-dev \
|
||||
libopencv-video-dev
|
||||
```
|
||||
|
||||
Debian 9 and Ubuntu 18.04 install the packages in
|
||||
`/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and
|
||||
[`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04
|
||||
may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow
|
||||
the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
||||
[`ffmpeg_linux.BUILD`] files accordingly.
|
||||
|
||||
Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the
|
||||
library path needs to be modified like the following:
|
||||
MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured
|
||||
for OpenCV 2/3 and should work correctly on any architecture:
|
||||
|
||||
```bash
|
||||
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD
|
||||
# WORKSPACE
|
||||
new_local_repository(
|
||||
name = "linux_opencv",
|
||||
build_file = "@//third_party:opencv_linux.BUILD",
|
||||
path = "/usr",
|
||||
)
|
||||
|
||||
# opencv_linux.BUILD for OpenCV 2/3 installed from Debian package
|
||||
cc_library(
|
||||
name = "opencv",
|
||||
linkopts = [
|
||||
"-l:libopencv_core.so",
|
||||
"-l:libopencv_calib3d.so",
|
||||
"-l:libopencv_features2d.so",
|
||||
"-l:libopencv_highgui.so",
|
||||
"-l:libopencv_imgcodecs.so",
|
||||
"-l:libopencv_imgproc.so",
|
||||
"-l:libopencv_video.so",
|
||||
"-l:libopencv_videoio.so",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source
|
||||
and modify MediaPipe's OpenCV config.
|
||||
For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account
|
||||
current architecture:
|
||||
|
||||
Option 3. Follow OpenCV's
|
||||
```bash
|
||||
# WORKSPACE
|
||||
new_local_repository(
|
||||
name = "linux_opencv",
|
||||
build_file = "@//third_party:opencv_linux.BUILD",
|
||||
path = "/usr",
|
||||
)
|
||||
|
||||
# opencv_linux.BUILD for OpenCV 4 installed from Debian package
|
||||
cc_library(
|
||||
name = "opencv",
|
||||
hdrs = glob([
|
||||
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||
# "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||
# "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h",
|
||||
# "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h",
|
||||
"include/opencv4/opencv2/**/*.h*",
|
||||
]),
|
||||
includes = [
|
||||
# Uncomment according to your multiarch value (gcc -print-multiarch):
|
||||
# "include/aarch64-linux-gnu/opencv4/",
|
||||
# "include/arm-linux-gnueabihf/opencv4/",
|
||||
# "include/x86_64-linux-gnu/opencv4/",
|
||||
"include/opencv4/",
|
||||
],
|
||||
linkopts = [
|
||||
"-l:libopencv_core.so",
|
||||
"-l:libopencv_calib3d.so",
|
||||
"-l:libopencv_features2d.so",
|
||||
"-l:libopencv_highgui.so",
|
||||
"-l:libopencv_imgcodecs.so",
|
||||
"-l:libopencv_imgproc.so",
|
||||
"-l:libopencv_video.so",
|
||||
"-l:libopencv_videoio.so",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
**Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from
|
||||
source and modify MediaPipe's OpenCV config. This option will do all steps
|
||||
defined in Option 3 automatically.
|
||||
|
||||
**Option 3**. Follow OpenCV's
|
||||
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
|
||||
to manually build OpenCV from source code.
|
||||
|
||||
Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and
|
||||
[`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg
|
||||
libraries. For example if OpenCV and FFmpeg are both manually installed in
|
||||
"/usr/local/", you will need to update: (1) the "linux_opencv" and
|
||||
"linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv"
|
||||
cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg"
|
||||
cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below:
|
||||
You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point
|
||||
MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to
|
||||
`/usr/local/` which is recommended by default.
|
||||
|
||||
OpenCV 2/3 setup:
|
||||
|
||||
```bash
|
||||
# WORKSPACE
|
||||
new_local_repository(
|
||||
name = "linux_opencv",
|
||||
build_file = "@//third_party:opencv_linux.BUILD",
|
||||
path = "/usr/local",
|
||||
name = "linux_opencv",
|
||||
build_file = "@//third_party:opencv_linux.BUILD",
|
||||
path = "/usr/local",
|
||||
)
|
||||
|
||||
# opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local
|
||||
cc_library(
|
||||
name = "opencv",
|
||||
linkopts = [
|
||||
"-L/usr/local/lib",
|
||||
"-l:libopencv_core.so",
|
||||
"-l:libopencv_calib3d.so",
|
||||
"-l:libopencv_features2d.so",
|
||||
"-l:libopencv_highgui.so",
|
||||
"-l:libopencv_imgcodecs.so",
|
||||
"-l:libopencv_imgproc.so",
|
||||
"-l:libopencv_video.so",
|
||||
"-l:libopencv_videoio.so",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
OpenCV 4 setup:
|
||||
|
||||
```bash
|
||||
# WORKSPACE
|
||||
new_local_repository(
|
||||
name = "linux_ffmpeg",
|
||||
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
||||
path = "/usr/local",
|
||||
name = "linux_opencv",
|
||||
build_file = "@//third_party:opencv_linux.BUILD",
|
||||
path = "/usr/local",
|
||||
)
|
||||
|
||||
# opencv_linux.BUILD for OpenCV 4 installed to /usr/local
|
||||
cc_library(
|
||||
name = "opencv",
|
||||
srcs = glob(
|
||||
[
|
||||
"lib/libopencv_core.so",
|
||||
"lib/libopencv_highgui.so",
|
||||
"lib/libopencv_imgcodecs.so",
|
||||
"lib/libopencv_imgproc.so",
|
||||
"lib/libopencv_video.so",
|
||||
"lib/libopencv_videoio.so",
|
||||
],
|
||||
),
|
||||
hdrs = glob([
|
||||
# For OpenCV 3.x
|
||||
"include/opencv2/**/*.h*",
|
||||
# For OpenCV 4.x
|
||||
# "include/opencv4/opencv2/**/*.h*",
|
||||
]),
|
||||
includes = [
|
||||
# For OpenCV 3.x
|
||||
"include/",
|
||||
# For OpenCV 4.x
|
||||
# "include/opencv4/",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
name = "opencv",
|
||||
hdrs = glob([
|
||||
"include/opencv4/opencv2/**/*.h*",
|
||||
]),
|
||||
includes = [
|
||||
"include/opencv4/",
|
||||
],
|
||||
linkopts = [
|
||||
"-L/usr/local/lib",
|
||||
"-l:libopencv_core.so",
|
||||
"-l:libopencv_calib3d.so",
|
||||
"-l:libopencv_features2d.so",
|
||||
"-l:libopencv_highgui.so",
|
||||
"-l:libopencv_imgcodecs.so",
|
||||
"-l:libopencv_imgproc.so",
|
||||
"-l:libopencv_video.so",
|
||||
"-l:libopencv_videoio.so",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work
|
||||
for any architecture:
|
||||
|
||||
```bash
|
||||
# WORKSPACE
|
||||
new_local_repository(
|
||||
name = "linux_ffmpeg",
|
||||
build_file = "@//third_party:ffmpeg_linux.BUILD",
|
||||
path = "/usr"
|
||||
)
|
||||
|
||||
# ffmpeg_linux.BUILD for FFmpeg installed from Debian package
|
||||
cc_library(
|
||||
name = "libffmpeg",
|
||||
srcs = glob(
|
||||
[
|
||||
"lib/libav*.so",
|
||||
],
|
||||
),
|
||||
hdrs = glob(["include/libav*/*.h"]),
|
||||
includes = ["include"],
|
||||
linkopts = [
|
||||
"-lavcodec",
|
||||
"-lavformat",
|
||||
"-lavutil",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
name = "libffmpeg",
|
||||
linkopts = [
|
||||
"-l:libavcodec.so",
|
||||
"-l:libavformat.so",
|
||||
"-l:libavutil.so",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
|
|
|
@ -29,6 +29,16 @@ Solution | NPM Package | Example
|
|||
Click on a solution link above for more information, including API and code
|
||||
snippets.
|
||||
|
||||
### Supported plaforms:
|
||||
|
||||
| Browser | Platform | Notes |
|
||||
| ------- | ----------------------- | -------------------------------------- |
|
||||
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
|
||||
| | | unsupported. |
|
||||
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
||||
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
||||
| | | MacBook |
|
||||
|
||||
The quickest way to get acclimated is to look at the examples above. Each demo
|
||||
has a link to a [CodePen][codepen] so that you can edit the code and try it
|
||||
yourself. We have included a number of utility packages to help you get started:
|
||||
|
|
BIN
docs/images/import_mp_android_studio_project.png
Normal file
BIN
docs/images/import_mp_android_studio_project.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 128 KiB |
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
BIN
docs/images/mobile/pose_segmentation.mp4
Normal file
Binary file not shown.
BIN
docs/images/run_android_solution_app.png
Normal file
BIN
docs/images/run_android_solution_app.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 258 KiB |
BIN
docs/images/run_create_win_symlinks.png
Normal file
BIN
docs/images/run_create_win_symlinks.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 51 KiB |
|
@ -278,6 +278,7 @@ Supported configuration options:
|
|||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_drawing_styles = mp.solutions.drawing_styles
|
||||
mp_face_mesh = mp.solutions.face_mesh
|
||||
|
||||
# For static images:
|
||||
|
@ -301,9 +302,17 @@ with mp_face_mesh.FaceMesh(
|
|||
mp_drawing.draw_landmarks(
|
||||
image=annotated_image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
||||
landmark_drawing_spec=drawing_spec,
|
||||
connection_drawing_spec=drawing_spec)
|
||||
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_tesselation_style())
|
||||
mp_drawing.draw_landmarks(
|
||||
image=annotated_image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_contours_style())
|
||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||
|
||||
# For webcam input:
|
||||
|
@ -335,9 +344,17 @@ with mp_face_mesh.FaceMesh(
|
|||
mp_drawing.draw_landmarks(
|
||||
image=image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
||||
landmark_drawing_spec=drawing_spec,
|
||||
connection_drawing_spec=drawing_spec)
|
||||
connections=mp_face_mesh.FACEMESH_TESSELATION,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_tesselation_style())
|
||||
mp_drawing.draw_landmarks(
|
||||
image=image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACEMESH_CONTOURS,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_contours_style())
|
||||
cv2.imshow('MediaPipe FaceMesh', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
|
@ -423,6 +440,200 @@ camera.start();
|
|||
</script>
|
||||
```
|
||||
|
||||
### Android Solution API
|
||||
|
||||
Please first follow general
|
||||
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
|
||||
to add MediaPipe Gradle dependencies, then try the FaceMash solution API in the
|
||||
companion
|
||||
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh)
|
||||
following
|
||||
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
|
||||
and learn more in the usage example below.
|
||||
|
||||
Supported configuration options:
|
||||
|
||||
* [staticImageMode](#static_image_mode)
|
||||
* [maxNumFaces](#max_num_faces)
|
||||
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||
|
||||
#### Camera Input
|
||||
|
||||
```java
|
||||
// For camera input and result rendering with OpenGL.
|
||||
FaceMeshOptions faceMeshOptions =
|
||||
FaceMeshOptions.builder()
|
||||
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
|
||||
.setMaxNumFaces(1) // setStaticImageMode(false)
|
||||
.setRunOnGpu(true).build();
|
||||
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||
facemesh.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||
|
||||
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
|
||||
CameraInput cameraInput = new CameraInput(this);
|
||||
cameraInput.setNewFrameListener(
|
||||
textureFrame -> facemesh.send(textureFrame));
|
||||
|
||||
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||
// as an example.
|
||||
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(
|
||||
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||
glSurfaceView.setRenderInputImage(true);
|
||||
|
||||
facemesh.setResultListener(
|
||||
faceMeshResult -> {
|
||||
NormalizedLandmark noseLandmark =
|
||||
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
noseLandmark.getX(), noseLandmark.getY()));
|
||||
// Request GL rendering.
|
||||
glSurfaceView.setRenderData(faceMeshResult);
|
||||
glSurfaceView.requestRender();
|
||||
});
|
||||
|
||||
// The runnable to start camera after the GLSurfaceView is attached.
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
cameraInput.start(
|
||||
this,
|
||||
facemesh.getGlContext(),
|
||||
CameraInput.CameraFacing.FRONT,
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
```
|
||||
|
||||
#### Image Input
|
||||
|
||||
```java
|
||||
// For reading images from gallery and drawing the output in an ImageView.
|
||||
FaceMeshOptions faceMeshOptions =
|
||||
FaceMeshOptions.builder()
|
||||
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE) // API soon to become
|
||||
.setMaxNumFaces(1) // setStaticImageMode(true)
|
||||
.setRunOnGpu(true).build();
|
||||
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||
|
||||
// Connects MediaPipe FaceMesh to the user-defined ImageView instance that allows
|
||||
// users to have the custom drawing of the output landmarks on it.
|
||||
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
|
||||
// as an example.
|
||||
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
|
||||
facemesh.setResultListener(
|
||||
faceMeshResult -> {
|
||||
int width = faceMeshResult.inputBitmap().getWidth();
|
||||
int height = faceMeshResult.inputBitmap().getHeight();
|
||||
NormalizedLandmark noseLandmark =
|
||||
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
|
||||
noseLandmark.getX() * width, noseLandmark.getY() * height));
|
||||
// Request canvas drawing.
|
||||
imageView.setFaceMeshResult(faceMeshResult);
|
||||
runOnUiThread(() -> imageView.update());
|
||||
});
|
||||
facemesh.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||
|
||||
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||
ActivityResultLauncher<Intent> imageGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||
Bitmap bitmap = null;
|
||||
try {
|
||||
bitmap =
|
||||
MediaStore.Images.Media.getBitmap(
|
||||
this.getContentResolver(), resultIntent.getData());
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Bitmap reading error:" + e);
|
||||
}
|
||||
if (bitmap != null) {
|
||||
facemesh.send(bitmap);
|
||||
}
|
||||
}
|
||||
});
|
||||
Intent gallery = new Intent(
|
||||
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
```
|
||||
|
||||
#### Video Input
|
||||
|
||||
```java
|
||||
// For video input and result rendering with OpenGL.
|
||||
FaceMeshOptions faceMeshOptions =
|
||||
FaceMeshOptions.builder()
|
||||
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
|
||||
.setMaxNumFaces(1) // setStaticImageMode(false)
|
||||
.setRunOnGpu(true).build();
|
||||
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
|
||||
facemesh.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||
|
||||
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
|
||||
VideoInput videoInput = new VideoInput(this);
|
||||
videoInput.setNewFrameListener(
|
||||
textureFrame -> facemesh.send(textureFrame));
|
||||
|
||||
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
|
||||
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
|
||||
// as an example.
|
||||
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(
|
||||
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||
glSurfaceView.setRenderInputImage(true);
|
||||
|
||||
facemesh.setResultListener(
|
||||
faceMeshResult -> {
|
||||
NormalizedLandmark noseLandmark =
|
||||
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
noseLandmark.getX(), noseLandmark.getY()));
|
||||
// Request GL rendering.
|
||||
glSurfaceView.setRenderData(faceMeshResult);
|
||||
glSurfaceView.requestRender();
|
||||
});
|
||||
|
||||
ActivityResultLauncher<Intent> videoGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null) {
|
||||
if (result.getResultCode() == RESULT_OK) {
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
videoInput.start(
|
||||
this,
|
||||
resultIntent.getData(),
|
||||
facemesh.getGlContext(),
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
}
|
||||
}
|
||||
});
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
```
|
||||
|
||||
## Example Apps
|
||||
|
||||
Please first see general instructions for
|
||||
|
|
|
@ -219,8 +219,8 @@ Supported configuration options:
|
|||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_drawing_styles = mp.solutions.drawing_styles
|
||||
mp_hands = mp.solutions.hands
|
||||
drawing_styles = mp.solutions.drawing_styles
|
||||
|
||||
# For static images:
|
||||
IMAGE_FILES = []
|
||||
|
@ -249,9 +249,11 @@ with mp_hands.Hands(
|
|||
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
|
||||
)
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
||||
drawing_styles.get_default_hand_landmark_style(),
|
||||
drawing_styles.get_default_hand_connection_style())
|
||||
annotated_image,
|
||||
hand_landmarks,
|
||||
mp_hands.HAND_CONNECTIONS,
|
||||
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||
mp_drawing_styles.get_default_hand_connections_style())
|
||||
cv2.imwrite(
|
||||
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
||||
|
||||
|
@ -281,9 +283,11 @@ with mp_hands.Hands(
|
|||
if results.multi_hand_landmarks:
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
|
||||
drawing_styles.get_default_hand_landmark_style(),
|
||||
drawing_styles.get_default_hand_connection_style())
|
||||
image,
|
||||
hand_landmarks,
|
||||
mp_hands.HAND_CONNECTIONS,
|
||||
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||
mp_drawing_styles.get_default_hand_connections_style())
|
||||
cv2.imshow('MediaPipe Hands', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
|
@ -364,6 +368,200 @@ camera.start();
|
|||
</script>
|
||||
```
|
||||
|
||||
### Android Solution API
|
||||
|
||||
Please first follow general
|
||||
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
|
||||
to add MediaPipe Gradle dependencies, then try the Hands solution API in the
|
||||
companion
|
||||
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands)
|
||||
following
|
||||
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
|
||||
and learn more in usage example below.
|
||||
|
||||
Supported configuration options:
|
||||
|
||||
* [staticImageMode](#static_image_mode)
|
||||
* [maxNumHands](#max_num_hands)
|
||||
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
|
||||
|
||||
#### Camera Input
|
||||
|
||||
```java
|
||||
// For camera input and result rendering with OpenGL.
|
||||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
|
||||
.setMaxNumHands(1) // setStaticImageMode(false)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
hands.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
||||
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
||||
CameraInput cameraInput = new CameraInput(this);
|
||||
cameraInput.setNewFrameListener(
|
||||
textureFrame -> hands.send(textureFrame));
|
||||
|
||||
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||
// as an example.
|
||||
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(
|
||||
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||
glSurfaceView.setRenderInputImage(true);
|
||||
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
wristLandmark.getX(), wristLandmark.getY()));
|
||||
// Request GL rendering.
|
||||
glSurfaceView.setRenderData(handsResult);
|
||||
glSurfaceView.requestRender();
|
||||
});
|
||||
|
||||
// The runnable to start camera after the GLSurfaceView is attached.
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
cameraInput.start(
|
||||
this,
|
||||
hands.getGlContext(),
|
||||
CameraInput.CameraFacing.FRONT,
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
```
|
||||
|
||||
#### Image Input
|
||||
|
||||
```java
|
||||
// For reading images from gallery and drawing the output in an ImageView.
|
||||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setMode(HandsOptions.STATIC_IMAGE_MODE) // API soon to become
|
||||
.setMaxNumHands(1) // setStaticImageMode(true)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
|
||||
// Connects MediaPipe Hands to the user-defined ImageView instance that allows
|
||||
// users to have the custom drawing of the output landmarks on it.
|
||||
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
|
||||
// as an example.
|
||||
HandsResultImageView imageView = new HandsResultImageView(this);
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
int width = handsResult.inputBitmap().getWidth();
|
||||
int height = handsResult.inputBitmap().getHeight();
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
|
||||
wristLandmark.getX() * width, wristLandmark.getY() * height));
|
||||
// Request canvas drawing.
|
||||
imageView.setHandsResult(handsResult);
|
||||
runOnUiThread(() -> imageView.update());
|
||||
});
|
||||
hands.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
||||
// ActivityResultLauncher to get an image from the gallery as Bitmap.
|
||||
ActivityResultLauncher<Intent> imageGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
|
||||
Bitmap bitmap = null;
|
||||
try {
|
||||
bitmap =
|
||||
MediaStore.Images.Media.getBitmap(
|
||||
this.getContentResolver(), resultIntent.getData());
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Bitmap reading error:" + e);
|
||||
}
|
||||
if (bitmap != null) {
|
||||
hands.send(bitmap);
|
||||
}
|
||||
}
|
||||
});
|
||||
Intent gallery = new Intent(
|
||||
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
```
|
||||
|
||||
#### Video Input
|
||||
|
||||
```java
|
||||
// For video input and result rendering with OpenGL.
|
||||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
|
||||
.setMaxNumHands(1) // setStaticImageMode(false)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
hands.setErrorListener(
|
||||
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
||||
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
|
||||
VideoInput videoInput = new VideoInput(this);
|
||||
videoInput.setNewFrameListener(
|
||||
textureFrame -> hands.send(textureFrame));
|
||||
|
||||
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
|
||||
// that provides the interfaces to run user-defined OpenGL rendering code.
|
||||
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
|
||||
// as an example.
|
||||
SolutionGlSurfaceView<HandsResult> glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(
|
||||
this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||
glSurfaceView.setRenderInputImage(true);
|
||||
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
wristLandmark.getX(), wristLandmark.getY()));
|
||||
// Request GL rendering.
|
||||
glSurfaceView.setRenderData(handsResult);
|
||||
glSurfaceView.requestRender();
|
||||
});
|
||||
|
||||
ActivityResultLauncher<Intent> videoGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null) {
|
||||
if (result.getResultCode() == RESULT_OK) {
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
videoInput.start(
|
||||
this,
|
||||
resultIntent.getData(),
|
||||
hands.getGlContext(),
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
}
|
||||
}
|
||||
});
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
```
|
||||
|
||||
## Example Apps
|
||||
|
||||
Please first see general instructions for
|
||||
|
|
|
@ -225,6 +225,7 @@ Supported configuration options:
|
|||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_drawing_styles = mp.solutions.drawing_styles
|
||||
mp_holistic = mp.solutions.holistic
|
||||
|
||||
# For static images:
|
||||
|
@ -247,13 +248,18 @@ with mp_holistic.Holistic(
|
|||
# Draw pose, left and right hands, and face landmarks on the image.
|
||||
annotated_image = image.copy()
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
||||
annotated_image,
|
||||
results.face_landmarks,
|
||||
mp_holistic.FACEMESH_TESSELATION,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_tesselation_style())
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
||||
annotated_image,
|
||||
results.pose_landmarks,
|
||||
mp_holistic.POSE_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_drawing_styles.
|
||||
get_default_pose_landmarks_style())
|
||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||
# Plot pose world landmarks.
|
||||
mp_drawing.plot_landmarks(
|
||||
|
@ -283,13 +289,18 @@ with mp_holistic.Holistic(
|
|||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
|
||||
image,
|
||||
results.face_landmarks,
|
||||
mp_holistic.FACEMESH_CONTOURS,
|
||||
landmark_drawing_spec=None,
|
||||
connection_drawing_spec=mp_drawing_styles
|
||||
.get_default_face_mesh_contours_style())
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
||||
image,
|
||||
results.pose_landmarks,
|
||||
mp_holistic.POSE_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_drawing_styles
|
||||
.get_default_pose_landmarks_style())
|
||||
cv2.imshow('MediaPipe Holistic', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
|
|
|
@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in
|
|||
augmented reality.
|
||||
|
||||
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
|
||||
33 3D landmarks on the whole body from RGB video frames utilizing our
|
||||
33 3D landmarks and background segmentation mask on the whole body from RGB
|
||||
video frames utilizing our
|
||||
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
||||
research that also powers the
|
||||
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
|
||||
|
@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be
|
|||
effective in our [MediaPipe Hands](./hands.md) and
|
||||
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
|
||||
first locates the person/pose region-of-interest (ROI) within the frame. The
|
||||
tracker subsequently predicts the pose landmarks within the ROI using the
|
||||
ROI-cropped frame as input. Note that for video use cases the detector is
|
||||
invoked only as needed, i.e., for the very first frame and when the tracker
|
||||
could no longer identify body pose presence in the previous frame. For other
|
||||
frames the pipeline simply derives the ROI from the previous frame’s pose
|
||||
tracker subsequently predicts the pose landmarks and segmentation mask within
|
||||
the ROI using the ROI-cropped frame as input. Note that for video use cases the
|
||||
detector is invoked only as needed, i.e., for the very first frame and when the
|
||||
tracker could no longer identify body pose presence in the previous frame. For
|
||||
other frames the pipeline simply derives the ROI from the previous frame’s pose
|
||||
landmarks.
|
||||
|
||||
The pipeline is implemented as a MediaPipe
|
||||
|
@ -129,16 +130,19 @@ hip midpoints.
|
|||
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
|
||||
(see figure below).
|
||||
|
||||
Please find more detail in the
|
||||
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
||||
this [paper](https://arxiv.org/abs/2006.10204) and
|
||||
[the model card](./models.md#pose), and the attributes in each landmark
|
||||
[below](#pose_landmarks).
|
||||
|
||||
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
|
||||
:----------------------------------------------------------------------------------------------: |
|
||||
*Fig 4. 33 pose landmarks.* |
|
||||
|
||||
Optionally, MediaPipe Pose can predicts a full-body
|
||||
[segmentation mask](#segmentation_mask) represented as a two-class segmentation
|
||||
(human or background).
|
||||
|
||||
Please find more detail in the
|
||||
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
|
||||
this [paper](https://arxiv.org/abs/2006.10204),
|
||||
[the model card](./models.md#pose) and the [Output](#Output) section below.
|
||||
|
||||
## Solution APIs
|
||||
|
||||
### Cross-platform Configuration Options
|
||||
|
@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input
|
|||
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
|
||||
is also set to `true`. Default to `true`.
|
||||
|
||||
#### enable_segmentation
|
||||
|
||||
If set to `true`, in addition to the pose landmarks the solution also generates
|
||||
the segmentation mask. Default to `false`.
|
||||
|
||||
#### smooth_segmentation
|
||||
|
||||
If set to `true`, the solution filters segmentation masks across different input
|
||||
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
|
||||
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
|
||||
`true`.
|
||||
|
||||
#### min_detection_confidence
|
||||
|
||||
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
||||
|
@ -211,6 +227,19 @@ the following:
|
|||
* `visibility`: Identical to that defined in the corresponding
|
||||
[pose_landmarks](#pose_landmarks).
|
||||
|
||||
#### segmentation_mask
|
||||
|
||||
The output segmentation mask, predicted only when
|
||||
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
|
||||
same width and height as the input image, and contains values in `[0.0, 1.0]`
|
||||
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
|
||||
pixel respectively. Please refer to the platform-specific usage examples below
|
||||
for usage details.
|
||||
|
||||
*Fig 6. Example of MediaPipe Pose segmentation mask.* |
|
||||
:-----------------------------------------------------------: |
|
||||
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_segmentation.mp4" type="video/mp4"></video> |
|
||||
|
||||
### Python Solution API
|
||||
|
||||
Please first follow general [instructions](../getting_started/python.md) to
|
||||
|
@ -222,6 +251,8 @@ Supported configuration options:
|
|||
* [static_image_mode](#static_image_mode)
|
||||
* [model_complexity](#model_complexity)
|
||||
* [smooth_landmarks](#smooth_landmarks)
|
||||
* [enable_segmentation](#enable_segmentation)
|
||||
* [smooth_segmentation](#smooth_segmentation)
|
||||
* [min_detection_confidence](#min_detection_confidence)
|
||||
* [min_tracking_confidence](#min_tracking_confidence)
|
||||
|
||||
|
@ -229,13 +260,16 @@ Supported configuration options:
|
|||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_drawing_styles = mp.solutions.drawing_styles
|
||||
mp_pose = mp.solutions.pose
|
||||
|
||||
# For static images:
|
||||
IMAGE_FILES = []
|
||||
BG_COLOR = (192, 192, 192) # gray
|
||||
with mp_pose.Pose(
|
||||
static_image_mode=True,
|
||||
model_complexity=2,
|
||||
enable_segmentation=True,
|
||||
min_detection_confidence=0.5) as pose:
|
||||
for idx, file in enumerate(IMAGE_FILES):
|
||||
image = cv2.imread(file)
|
||||
|
@ -250,10 +284,21 @@ with mp_pose.Pose(
|
|||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
|
||||
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
|
||||
)
|
||||
# Draw pose landmarks on the image.
|
||||
|
||||
annotated_image = image.copy()
|
||||
# Draw segmentation on the image.
|
||||
# To improve segmentation around boundaries, consider applying a joint
|
||||
# bilateral filter to "results.segmentation_mask" with "image".
|
||||
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
|
||||
bg_image = np.zeros(image.shape, dtype=np.uint8)
|
||||
bg_image[:] = BG_COLOR
|
||||
annotated_image = np.where(condition, annotated_image, bg_image)
|
||||
# Draw pose landmarks on the image.
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||
annotated_image,
|
||||
results.pose_landmarks,
|
||||
mp_pose.POSE_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||
# Plot pose world landmarks.
|
||||
mp_drawing.plot_landmarks(
|
||||
|
@ -283,7 +328,10 @@ with mp_pose.Pose(
|
|||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||
image,
|
||||
results.pose_landmarks,
|
||||
mp_pose.POSE_CONNECTIONS,
|
||||
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
||||
cv2.imshow('MediaPipe Pose', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
|
@ -300,6 +348,8 @@ Supported configuration options:
|
|||
|
||||
* [modelComplexity](#model_complexity)
|
||||
* [smoothLandmarks](#smooth_landmarks)
|
||||
* [enableSegmentation](#enable_segmentation)
|
||||
* [smoothSegmentation](#smooth_segmentation)
|
||||
* [minDetectionConfidence](#min_detection_confidence)
|
||||
* [minTrackingConfidence](#min_tracking_confidence)
|
||||
|
||||
|
@ -340,8 +390,20 @@ function onResults(results) {
|
|||
|
||||
canvasCtx.save();
|
||||
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
||||
canvasCtx.drawImage(results.segmentationMask, 0, 0,
|
||||
canvasElement.width, canvasElement.height);
|
||||
|
||||
// Only overwrite existing pixels.
|
||||
canvasCtx.globalCompositeOperation = 'source-in';
|
||||
canvasCtx.fillStyle = '#00FF00';
|
||||
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
|
||||
|
||||
// Only overwrite missing pixels.
|
||||
canvasCtx.globalCompositeOperation = 'destination-atop';
|
||||
canvasCtx.drawImage(
|
||||
results.image, 0, 0, canvasElement.width, canvasElement.height);
|
||||
|
||||
canvasCtx.globalCompositeOperation = 'source-over';
|
||||
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
|
||||
{color: '#00FF00', lineWidth: 4});
|
||||
drawLandmarks(canvasCtx, results.poseLandmarks,
|
||||
|
@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => {
|
|||
pose.setOptions({
|
||||
modelComplexity: 1,
|
||||
smoothLandmarks: true,
|
||||
enableSegmentation: true,
|
||||
smoothSegmentation: true,
|
||||
minDetectionConfidence: 0.5,
|
||||
minTrackingConfidence: 0.5
|
||||
});
|
||||
|
|
|
@ -64,8 +64,9 @@ std::string ToString(GateState state) {
|
|||
// ALLOW or DISALLOW can also be specified as an input side packet. The rules
|
||||
// for evaluation remain the same as above.
|
||||
//
|
||||
// ALLOW/DISALLOW inputs must be specified either using input stream or
|
||||
// via input side packet but not both.
|
||||
// ALLOW/DISALLOW inputs must be specified either using input stream or via
|
||||
// input side packet but not both. If neither is specified, the behavior is then
|
||||
// determined by the "allow" field in the calculator options.
|
||||
//
|
||||
// Intended to be used with the default input stream handler, which synchronizes
|
||||
// all data input streams with the ALLOW/DISALLOW control input stream.
|
||||
|
@ -92,20 +93,22 @@ class GateCalculator : public CalculatorBase {
|
|||
cc->InputSidePackets().HasTag(kDisallowTag);
|
||||
bool input_via_stream =
|
||||
cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
|
||||
// Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW
|
||||
// input.
|
||||
RET_CHECK(input_via_side_packet ^ input_via_stream);
|
||||
|
||||
// Only one of input_side_packet or input_stream may specify
|
||||
// ALLOW/DISALLOW input.
|
||||
if (input_via_side_packet) {
|
||||
RET_CHECK(!input_via_stream);
|
||||
RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
|
||||
cc->InputSidePackets().HasTag(kDisallowTag));
|
||||
|
||||
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||
cc->InputSidePackets().Tag(kAllowTag).Set<bool>();
|
||||
cc->InputSidePackets().Tag(kAllowTag).Set<bool>().Optional();
|
||||
} else {
|
||||
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>();
|
||||
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>().Optional();
|
||||
}
|
||||
} else {
|
||||
}
|
||||
if (input_via_stream) {
|
||||
RET_CHECK(!input_via_side_packet);
|
||||
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
|
||||
cc->Inputs().HasTag(kDisallowTag));
|
||||
|
||||
|
@ -139,7 +142,6 @@ class GateCalculator : public CalculatorBase {
|
|||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) final {
|
||||
use_side_packet_for_allow_disallow_ = false;
|
||||
if (cc->InputSidePackets().HasTag(kAllowTag)) {
|
||||
use_side_packet_for_allow_disallow_ = true;
|
||||
allow_by_side_packet_decision_ =
|
||||
|
@ -158,12 +160,20 @@ class GateCalculator : public CalculatorBase {
|
|||
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
|
||||
empty_packets_as_allow_ = options.empty_packets_as_allow();
|
||||
|
||||
if (!use_side_packet_for_allow_disallow_ &&
|
||||
!cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) {
|
||||
use_option_for_allow_disallow_ = true;
|
||||
allow_by_option_decision_ = options.allow();
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) final {
|
||||
bool allow = empty_packets_as_allow_;
|
||||
if (use_side_packet_for_allow_disallow_) {
|
||||
if (use_option_for_allow_disallow_) {
|
||||
allow = allow_by_option_decision_;
|
||||
} else if (use_side_packet_for_allow_disallow_) {
|
||||
allow = allow_by_side_packet_decision_;
|
||||
} else {
|
||||
if (cc->Inputs().HasTag(kAllowTag) &&
|
||||
|
@ -217,8 +227,10 @@ class GateCalculator : public CalculatorBase {
|
|||
GateState last_gate_state_ = GATE_UNINITIALIZED;
|
||||
int num_data_streams_;
|
||||
bool empty_packets_as_allow_;
|
||||
bool use_side_packet_for_allow_disallow_;
|
||||
bool use_side_packet_for_allow_disallow_ = false;
|
||||
bool allow_by_side_packet_decision_;
|
||||
bool use_option_for_allow_disallow_ = false;
|
||||
bool allow_by_option_decision_;
|
||||
};
|
||||
REGISTER_CALCULATOR(GateCalculator);
|
||||
|
||||
|
|
|
@ -29,4 +29,8 @@ message GateCalculatorOptions {
|
|||
// disallowing the corresponding packets in the data input streams. Setting
|
||||
// this option to true inverts that, allowing the data packets to go through.
|
||||
optional bool empty_packets_as_allow = 1;
|
||||
|
||||
// Whether to allow or disallow the input streams to pass when no
|
||||
// ALLOW/DISALLOW input or side input is specified.
|
||||
optional bool allow = 2 [default = false];
|
||||
}
|
||||
|
|
|
@ -113,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) {
|
|||
)")));
|
||||
}
|
||||
|
||||
TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) {
|
||||
SetRunner(R"(
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "test_input"
|
||||
output_stream: "test_output"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
constexpr int64 kTimestampValue0 = 42;
|
||||
RunTimeStep(kTimestampValue0, true);
|
||||
constexpr int64 kTimestampValue1 = 43;
|
||||
RunTimeStep(kTimestampValue1, false);
|
||||
|
||||
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||
ASSERT_EQ(2, output.size());
|
||||
EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value());
|
||||
EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value());
|
||||
EXPECT_EQ(true, output[0].Get<bool>());
|
||||
EXPECT_EQ(false, output[1].Get<bool>());
|
||||
}
|
||||
|
||||
TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) {
|
||||
SetRunner(R"(
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "test_input"
|
||||
output_stream: "test_output"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: false
|
||||
}
|
||||
}
|
||||
)");
|
||||
|
||||
constexpr int64 kTimestampValue0 = 42;
|
||||
RunTimeStep(kTimestampValue0, true);
|
||||
constexpr int64 kTimestampValue1 = 43;
|
||||
RunTimeStep(kTimestampValue1, false);
|
||||
|
||||
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||
ASSERT_EQ(0, output.size());
|
||||
}
|
||||
|
||||
TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) {
|
||||
SetRunner(R"(
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "test_input"
|
||||
output_stream: "test_output"
|
||||
)");
|
||||
|
||||
constexpr int64 kTimestampValue0 = 42;
|
||||
RunTimeStep(kTimestampValue0, true);
|
||||
constexpr int64 kTimestampValue1 = 43;
|
||||
RunTimeStep(kTimestampValue1, false);
|
||||
|
||||
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
|
||||
ASSERT_EQ(0, output.size());
|
||||
}
|
||||
|
||||
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
|
||||
SetRunner(R"(
|
||||
calculator: "GateCalculator"
|
||||
|
|
|
@ -661,3 +661,138 @@ cc_test(
|
|||
"//mediapipe/framework/port:parse_text_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "affine_transformation",
|
||||
hdrs = ["affine_transformation.h"],
|
||||
deps = ["@com_google_absl//absl/status:statusor"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "affine_transformation_runner_gl",
|
||||
srcs = ["affine_transformation_runner_gl.cc"],
|
||||
hdrs = ["affine_transformation_runner_gl.h"],
|
||||
deps = [
|
||||
":affine_transformation",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gl_simple_shaders",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||
"//mediapipe/gpu:shader_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "affine_transformation_runner_opencv",
|
||||
srcs = ["affine_transformation_runner_opencv.cc"],
|
||||
hdrs = ["affine_transformation_runner_opencv.h"],
|
||||
deps = [
|
||||
":affine_transformation",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "warp_affine_calculator_proto",
|
||||
srcs = ["warp_affine_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/gpu:gpu_origin_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "warp_affine_calculator",
|
||||
srcs = ["warp_affine_calculator.cc"],
|
||||
hdrs = ["warp_affine_calculator.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":affine_transformation",
|
||||
":affine_transformation_runner_opencv",
|
||||
":warp_affine_calculator_cc_proto",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/api2:node",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"//mediapipe/framework/formats:image",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
":affine_transformation_runner_gl",
|
||||
],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "warp_affine_calculator_test",
|
||||
srcs = ["warp_affine_calculator_test.cc"],
|
||||
data = [
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png",
|
||||
"//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png",
|
||||
],
|
||||
tags = ["desktop_only_test"],
|
||||
deps = [
|
||||
":affine_transformation",
|
||||
":warp_affine_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_converter",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_utils",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/formats:image",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||
"@com_google_absl//absl/flags:flag",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
|
55
mediapipe/calculators/image/affine_transformation.h
Normal file
55
mediapipe/calculators/image/affine_transformation.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
class AffineTransformation {
|
||||
public:
|
||||
// Pixel extrapolation method.
|
||||
// When converting image to tensor it may happen that tensor needs to read
|
||||
// pixels outside image boundaries. Border mode helps to specify how such
|
||||
// pixels will be calculated.
|
||||
enum class BorderMode { kZero, kReplicate };
|
||||
|
||||
struct Size {
|
||||
int width;
|
||||
int height;
|
||||
};
|
||||
|
||||
template <typename InputT, typename OutputT>
|
||||
class Runner {
|
||||
public:
|
||||
virtual ~Runner() = default;
|
||||
|
||||
// Transforms input into output using @matrix as following:
|
||||
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||
// where x and y ranges are defined by @output_size.
|
||||
virtual absl::StatusOr<OutputT> Run(const InputT& input,
|
||||
const std::array<float, 16>& matrix,
|
||||
const Size& output_size,
|
||||
BorderMode border_mode) = 0;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
|
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
354
mediapipe/calculators/image/affine_transformation_runner_gl.cc
Normal file
|
@ -0,0 +1,354 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/Geometry"
|
||||
#include "Eigen/LU"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||
#include "mediapipe/gpu/shader_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
using mediapipe::GlCalculatorHelper;
|
||||
using mediapipe::GlhCreateProgram;
|
||||
using mediapipe::GlTexture;
|
||||
using mediapipe::GpuBuffer;
|
||||
using mediapipe::GpuOrigin;
|
||||
|
||||
bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) {
|
||||
switch (gpu_origin) {
|
||||
case GpuOrigin::DEFAULT:
|
||||
case GpuOrigin::CONVENTIONAL:
|
||||
#ifdef __APPLE__
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif // __APPLE__
|
||||
case GpuOrigin::TOP_LEFT:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0
|
||||
#else
|
||||
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1
|
||||
#endif // __APPLE__
|
||||
|
||||
bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
|
||||
return gl_context.gl_major_version() > 3 ||
|
||||
(gl_context.gl_major_version() == 3 &&
|
||||
gl_context.gl_minor_version() >= 2);
|
||||
}
|
||||
|
||||
constexpr int kAttribVertex = 0;
|
||||
constexpr int kAttribTexturePosition = 1;
|
||||
constexpr int kNumAttributes = 2;
|
||||
|
||||
class GlTextureWarpAffineRunner
|
||||
: public AffineTransformation::Runner<GpuBuffer,
|
||||
std::unique_ptr<GpuBuffer>> {
|
||||
public:
|
||||
GlTextureWarpAffineRunner(std::shared_ptr<GlCalculatorHelper> gl_helper,
|
||||
GpuOrigin::Mode gpu_origin)
|
||||
: gl_helper_(gl_helper), gpu_origin_(gpu_origin) {}
|
||||
absl::Status Init() {
|
||||
return gl_helper_->RunInGlContext([this]() -> absl::Status {
|
||||
const GLint attr_location[kNumAttributes] = {
|
||||
kAttribVertex,
|
||||
kAttribTexturePosition,
|
||||
};
|
||||
const GLchar* attr_name[kNumAttributes] = {
|
||||
"position",
|
||||
"texture_coordinate",
|
||||
};
|
||||
|
||||
constexpr GLchar kVertShader[] = R"(
|
||||
in vec4 position;
|
||||
in mediump vec4 texture_coordinate;
|
||||
out mediump vec2 sample_coordinate;
|
||||
uniform mat4 transform_matrix;
|
||||
|
||||
void main() {
|
||||
gl_Position = position;
|
||||
vec4 tc = transform_matrix * texture_coordinate;
|
||||
sample_coordinate = tc.xy;
|
||||
}
|
||||
)";
|
||||
|
||||
constexpr GLchar kFragShader[] = R"(
|
||||
DEFAULT_PRECISION(mediump, float)
|
||||
in vec2 sample_coordinate;
|
||||
uniform sampler2D input_texture;
|
||||
|
||||
#ifdef GL_ES
|
||||
#define fragColor gl_FragColor
|
||||
#else
|
||||
out vec4 fragColor;
|
||||
#endif // defined(GL_ES);
|
||||
|
||||
void main() {
|
||||
vec4 color = texture2D(input_texture, sample_coordinate);
|
||||
#ifdef CUSTOM_ZERO_BORDER_MODE
|
||||
float out_of_bounds =
|
||||
float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 ||
|
||||
sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0);
|
||||
color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds);
|
||||
#endif // defined(CUSTOM_ZERO_BORDER_MODE)
|
||||
fragColor = color;
|
||||
}
|
||||
)";
|
||||
|
||||
// Create program and set parameters.
|
||||
auto create_fn = [&](const std::string& vs,
|
||||
const std::string& fs) -> absl::StatusOr<Program> {
|
||||
GLuint program = 0;
|
||||
GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0],
|
||||
attr_location, &program);
|
||||
|
||||
RET_CHECK(program) << "Problem initializing warp affine program.";
|
||||
glUseProgram(program);
|
||||
glUniform1i(glGetUniformLocation(program, "input_texture"), 1);
|
||||
GLint matrix_id = glGetUniformLocation(program, "transform_matrix");
|
||||
return Program{.id = program, .matrix_id = matrix_id};
|
||||
};
|
||||
|
||||
const std::string vert_src =
|
||||
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader);
|
||||
|
||||
const std::string frag_src = absl::StrCat(
|
||||
mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader);
|
||||
|
||||
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
|
||||
|
||||
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
|
||||
std::string custom_zero_border_mode_def = R"(
|
||||
#define CUSTOM_ZERO_BORDER_MODE
|
||||
)";
|
||||
const std::string frag_custom_zero_src =
|
||||
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
|
||||
custom_zero_border_mode_def, kFragShader);
|
||||
return create_fn(vert_src, frag_custom_zero_src);
|
||||
};
|
||||
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
|
||||
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||
}
|
||||
#else
|
||||
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
|
||||
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||
|
||||
glGenFramebuffers(1, &framebuffer_);
|
||||
|
||||
// vertex storage
|
||||
glGenBuffers(2, vbo_);
|
||||
glGenVertexArrays(1, &vao_);
|
||||
|
||||
// vbo 0
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
|
||||
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
|
||||
|
||||
// vbo 1
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
|
||||
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
return absl::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
absl::StatusOr<std::unique_ptr<GpuBuffer>> Run(
|
||||
const GpuBuffer& input, const std::array<float, 16>& matrix,
|
||||
const AffineTransformation::Size& size,
|
||||
AffineTransformation::BorderMode border_mode) override {
|
||||
std::unique_ptr<GpuBuffer> gpu_buffer;
|
||||
MP_RETURN_IF_ERROR(
|
||||
gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode,
|
||||
&gpu_buffer]() -> absl::Status {
|
||||
auto input_texture = gl_helper_->CreateSourceTexture(input);
|
||||
auto output_texture = gl_helper_->CreateDestinationTexture(
|
||||
size.width, size.height, input.format());
|
||||
|
||||
MP_RETURN_IF_ERROR(
|
||||
RunInternal(input_texture, matrix, border_mode, &output_texture));
|
||||
gpu_buffer = output_texture.GetFrame<GpuBuffer>();
|
||||
return absl::OkStatus();
|
||||
}));
|
||||
|
||||
return gpu_buffer;
|
||||
}
|
||||
|
||||
absl::Status RunInternal(const GlTexture& texture,
|
||||
const std::array<float, 16>& matrix,
|
||||
AffineTransformation::BorderMode border_mode,
|
||||
GlTexture* output) {
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
|
||||
glViewport(0, 0, output->width(), output->height());
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, output->name());
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
output->name(), 0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(texture.target(), texture.name());
|
||||
|
||||
// a) Filtering.
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
|
||||
// b) Clamping.
|
||||
std::optional<Program> program = program_;
|
||||
switch (border_mode) {
|
||||
case AffineTransformation::BorderMode::kReplicate: {
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
break;
|
||||
}
|
||||
case AffineTransformation::BorderMode::kZero: {
|
||||
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||
if (program_custom_zero_) {
|
||||
program = program_custom_zero_;
|
||||
} else {
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
|
||||
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR,
|
||||
std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f}.data());
|
||||
}
|
||||
#else
|
||||
RET_CHECK(program_custom_zero_)
|
||||
<< "Program must have been initialized.";
|
||||
program = program_custom_zero_;
|
||||
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||
break;
|
||||
}
|
||||
}
|
||||
glUseProgram(program->id);
|
||||
|
||||
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> eigen_mat(matrix.data());
|
||||
if (IsMatrixVerticalFlipNeeded(gpu_origin_)) {
|
||||
// @matrix describes affine transformation in terms of TOP LEFT origin, so
|
||||
// in some cases/on some platforms an extra flipping should be done before
|
||||
// and after.
|
||||
const Eigen::Matrix<float, 4, 4, Eigen::RowMajor> flip_y(
|
||||
{{1.0f, 0.0f, 0.0f, 0.0f},
|
||||
{0.0f, -1.0f, 0.0f, 1.0f},
|
||||
{0.0f, 0.0f, 1.0f, 0.0f},
|
||||
{0.0f, 0.0f, 0.0f, 1.0f}});
|
||||
eigen_mat = flip_y * eigen_mat * flip_y;
|
||||
}
|
||||
|
||||
// If GL context is ES2, then GL_FALSE must be used for 'transpose'
|
||||
// GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported.
|
||||
// Hence, transposing the matrix and always passing transposed.
|
||||
eigen_mat.transposeInPlace();
|
||||
glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data());
|
||||
|
||||
// vao
|
||||
glBindVertexArray(vao_);
|
||||
|
||||
// vbo 0
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||
glEnableVertexAttribArray(kAttribVertex);
|
||||
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
|
||||
|
||||
// vbo 1
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||
glEnableVertexAttribArray(kAttribTexturePosition);
|
||||
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
|
||||
|
||||
// draw
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
// Resetting to MediaPipe texture param defaults.
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
|
||||
glDisableVertexAttribArray(kAttribVertex);
|
||||
glDisableVertexAttribArray(kAttribTexturePosition);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glBindVertexArray(0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
~GlTextureWarpAffineRunner() override {
|
||||
gl_helper_->RunInGlContext([this]() {
|
||||
// Release OpenGL resources.
|
||||
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
|
||||
if (program_.id != 0) glDeleteProgram(program_.id);
|
||||
if (program_custom_zero_ && program_custom_zero_->id != 0) {
|
||||
glDeleteProgram(program_custom_zero_->id);
|
||||
}
|
||||
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
|
||||
glDeleteBuffers(2, vbo_);
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
struct Program {
|
||||
GLuint id;
|
||||
GLint matrix_id;
|
||||
};
|
||||
std::shared_ptr<GlCalculatorHelper> gl_helper_;
|
||||
GpuOrigin::Mode gpu_origin_;
|
||||
GLuint vao_ = 0;
|
||||
GLuint vbo_[2] = {0, 0};
|
||||
Program program_;
|
||||
std::optional<Program> program_custom_zero_;
|
||||
GLuint framebuffer_ = 0;
|
||||
};
|
||||
|
||||
#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<std::unique_ptr<
|
||||
AffineTransformation::Runner<GpuBuffer, std::unique_ptr<GpuBuffer>>>>
|
||||
CreateAffineTransformationGlRunner(
|
||||
std::shared_ptr<GlCalculatorHelper> gl_helper, GpuOrigin::Mode gpu_origin) {
|
||||
auto runner =
|
||||
absl::make_unique<GlTextureWarpAffineRunner>(gl_helper, gpu_origin);
|
||||
MP_RETURN_IF_ERROR(runner->Init());
|
||||
return runner;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
absl::StatusOr<std::unique_ptr<AffineTransformation::Runner<
|
||||
mediapipe::GpuBuffer, std::unique_ptr<mediapipe::GpuBuffer>>>>
|
||||
CreateAffineTransformationGlRunner(
|
||||
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper,
|
||||
mediapipe::GpuOrigin::Mode gpu_origin);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
|
|
@ -0,0 +1,160 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
cv::BorderTypes GetBorderModeForOpenCv(
|
||||
AffineTransformation::BorderMode border_mode) {
|
||||
switch (border_mode) {
|
||||
case AffineTransformation::BorderMode::kZero:
|
||||
return cv::BORDER_CONSTANT;
|
||||
case AffineTransformation::BorderMode::kReplicate:
|
||||
return cv::BORDER_REPLICATE;
|
||||
}
|
||||
}
|
||||
|
||||
class OpenCvRunner
|
||||
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
|
||||
public:
|
||||
absl::StatusOr<ImageFrame> Run(
|
||||
const ImageFrame& input, const std::array<float, 16>& matrix,
|
||||
const AffineTransformation::Size& size,
|
||||
AffineTransformation::BorderMode border_mode) override {
|
||||
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
|
||||
// accepts and produces relative coordinates) should be adjusted to first
|
||||
// normalize coordinates and then scale them.
|
||||
// clang-format off
|
||||
cv::Matx44f normalize_dst_coordinate({
|
||||
1.0f / size.width, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f / size.height, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f});
|
||||
cv::Matx44f scale_src_coordinate({
|
||||
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f});
|
||||
// clang-format on
|
||||
cv::Matx44f adjust_dst_coordinate;
|
||||
cv::Matx44f adjust_src_coordinate;
|
||||
// TODO: update to always use accurate implementation.
|
||||
constexpr bool kOpenCvCompatibility = true;
|
||||
if (kOpenCvCompatibility) {
|
||||
adjust_dst_coordinate = normalize_dst_coordinate;
|
||||
adjust_src_coordinate = scale_src_coordinate;
|
||||
} else {
|
||||
// To do an accurate affine image transformation and make "on-cpu" and
|
||||
// "on-gpu" calculations aligned - extra offset is required to select
|
||||
// correct pixels.
|
||||
//
|
||||
// Each destination pixel corresponds to some pixels region from source
|
||||
// image.(In case of downscaling there can be more than one pixel.) The
|
||||
// offset for x and y is calculated in the way, so pixel in the middle of
|
||||
// the region is selected.
|
||||
//
|
||||
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
|
||||
// without a rotation:
|
||||
// 1. Each destination pixel corresponds to 10x5 region
|
||||
// X range: [0, .. , 9]
|
||||
// Y range: [0, .. , 4]
|
||||
// 2. Considering we have __discrete__ pixels, the center of the region is
|
||||
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
|
||||
// (4.5, 2).
|
||||
// 3. When using the above as an offset for every pixel select while
|
||||
// downscaling, resulting pixels are:
|
||||
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
|
||||
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
|
||||
// ..
|
||||
// (4.5, 47), (14.5, 47), .., (94.5, 47)
|
||||
// instead of:
|
||||
// (0, 0), (10, 0), .. , (90, 0)
|
||||
// (0, 5), (10, 7), .. , (90, 5)
|
||||
// ..
|
||||
// (0, 45), (10, 45), .., (90, 45)
|
||||
// The latter looks shifted.
|
||||
//
|
||||
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
|
||||
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
|
||||
// transformation matrix should shift coordinates by (0.5, 0.5) as the
|
||||
// very first step.
|
||||
//
|
||||
// Due to the above shift, transformed coordinates would be valid for
|
||||
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
|
||||
// T0 make it valid for __discrete__ pixels, transformation matrix should
|
||||
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
|
||||
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
|
||||
// clang-format off
|
||||
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
|
||||
0.0f, 1.0f, 0.0f, 0.5f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f});
|
||||
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
|
||||
0.0f, 1.0f, 0.0f, -0.5f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f});
|
||||
// clang-format on
|
||||
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
|
||||
adjust_src_coordinate = shift_src * scale_src_coordinate;
|
||||
}
|
||||
|
||||
cv::Matx44f transform(matrix.data());
|
||||
cv::Matx44f transform_absolute =
|
||||
adjust_src_coordinate * transform * adjust_dst_coordinate;
|
||||
|
||||
cv::Mat in_mat = formats::MatView(&input);
|
||||
|
||||
cv::Mat cv_affine_transform(2, 3, CV_32F);
|
||||
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
|
||||
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
|
||||
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
|
||||
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
|
||||
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
|
||||
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
|
||||
|
||||
ImageFrame out_image(input.Format(), size.width, size.height);
|
||||
cv::Mat out_mat = formats::MatView(&out_image);
|
||||
|
||||
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
|
||||
cv::Size(out_mat.cols, out_mat.rows),
|
||||
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
|
||||
GetBorderModeForOpenCv(border_mode));
|
||||
|
||||
return out_image;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<
|
||||
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||
CreateAffineTransformationOpenCvRunner() {
|
||||
return absl::make_unique<OpenCvRunner>();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
absl::StatusOr<
|
||||
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
|
||||
CreateAffineTransformationOpenCvRunner();
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
|
|
@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) {
|
|||
scale_image::FindOutputDimensions(crop_width_, crop_height_, //
|
||||
options_.target_width(), //
|
||||
options_.target_height(), //
|
||||
options_.target_max_area(), //
|
||||
options_.preserve_aspect_ratio(), //
|
||||
options_.scale_to_multiple_of(), //
|
||||
&output_width_, &output_height_));
|
||||
|
|
|
@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions {
|
|||
optional int32 target_width = 1;
|
||||
optional int32 target_height = 2;
|
||||
|
||||
// If set, then automatically calculates a target_width and target_height that
|
||||
// has an area below the target max area. Aspect ratio preservation cannot be
|
||||
// disabled.
|
||||
optional int32 target_max_area = 15;
|
||||
|
||||
// If true, the image is scaled up or down proportionally so that it
|
||||
// fits inside the box represented by target_width and target_height.
|
||||
// Otherwise it is scaled to fit target_width and target_height
|
||||
|
|
|
@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, //
|
|||
int input_height, //
|
||||
int target_width, //
|
||||
int target_height, //
|
||||
int target_max_area, //
|
||||
bool preserve_aspect_ratio, //
|
||||
int scale_to_multiple_of, //
|
||||
int* output_width, int* output_height) {
|
||||
CHECK(output_width);
|
||||
CHECK(output_height);
|
||||
|
||||
if (target_max_area > 0 && input_width * input_height > target_max_area) {
|
||||
preserve_aspect_ratio = true;
|
||||
target_height = static_cast<int>(sqrt(static_cast<double>(target_max_area) /
|
||||
(static_cast<double>(input_width) /
|
||||
static_cast<double>(input_height))));
|
||||
target_width = -1; // Resize width to preserve aspect ratio.
|
||||
}
|
||||
|
||||
if (preserve_aspect_ratio) {
|
||||
RET_CHECK(scale_to_multiple_of == 2)
|
||||
<< "FindOutputDimensions always outputs width and height that are "
|
||||
|
@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, //
|
|||
<< "Unable to set output dimensions based on target dimensions.";
|
||||
}
|
||||
|
||||
absl::Status FindOutputDimensions(int input_width, //
|
||||
int input_height, //
|
||||
int target_width, //
|
||||
int target_height, //
|
||||
bool preserve_aspect_ratio, //
|
||||
int scale_to_multiple_of, //
|
||||
int* output_width, int* output_height) {
|
||||
return FindOutputDimensions(
|
||||
input_width, input_height, target_width, target_height, -1,
|
||||
preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height);
|
||||
}
|
||||
|
||||
} // namespace scale_image
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, //
|
|||
int* crop_width, int* crop_height, //
|
||||
int* col_start, int* row_start);
|
||||
|
||||
// Given an input width and height, a target width and height, whether to
|
||||
// preserve the aspect ratio, and whether to round-down to the multiple of a
|
||||
// given number nearest to the targets, determine the output width and height.
|
||||
// If target_width or target_height is non-positive, then they will be set to
|
||||
// the input_width and input_height respectively. If scale_to_multiple_of is
|
||||
// less than 1, it will be treated like 1. The output_width and
|
||||
// output_height will be reduced as necessary to preserve_aspect_ratio if the
|
||||
// option is specified. If preserving the aspect ratio is desired, you must set
|
||||
// scale_to_multiple_of to 2.
|
||||
// Given an input width and height, a target width and height or max area,
|
||||
// whether to preserve the aspect ratio, and whether to round-down to the
|
||||
// multiple of a given number nearest to the targets, determine the output width
|
||||
// and height. If target_width or target_height is non-positive, then they will
|
||||
// be set to the input_width and input_height respectively. If target_area is
|
||||
// non-positive, then it will be ignored. If scale_to_multiple_of is less than
|
||||
// 1, it will be treated like 1. The output_width and output_height will be
|
||||
// reduced as necessary to preserve_aspect_ratio if the option is specified. If
|
||||
// preserving the aspect ratio is desired, you must set scale_to_multiple_of
|
||||
// to 2.
|
||||
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||
int target_width,
|
||||
int target_height, //
|
||||
int target_max_area, //
|
||||
bool preserve_aspect_ratio, //
|
||||
int scale_to_multiple_of, //
|
||||
int* output_width, int* output_height);
|
||||
|
||||
// Backwards compatible helper.
|
||||
absl::Status FindOutputDimensions(int input_width, int input_height, //
|
||||
int target_width,
|
||||
int target_height, //
|
||||
|
|
|
@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) {
|
|||
int output_width;
|
||||
int output_height;
|
||||
// Not scale.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(100, output_height);
|
||||
// Not scale with odd input size.
|
||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(201, output_width);
|
||||
EXPECT_EQ(101, output_height);
|
||||
// Scale down by 1/2.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(100, output_width);
|
||||
EXPECT_EQ(50, output_height);
|
||||
// Scale up, doubling dimensions.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(400, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
|
||||
// visible by 2.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(150, output_width);
|
||||
EXPECT_EQ(74, output_height);
|
||||
// Fits a 2:1 image into a 400 x 50 box.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(100, output_width);
|
||||
EXPECT_EQ(50, output_height);
|
||||
// Scale to multiple number with odd targe size.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(100, output_width);
|
||||
EXPECT_EQ(50, output_height);
|
||||
// Scale to multiple number with odd targe size.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(100, output_width);
|
||||
EXPECT_EQ(50, output_height);
|
||||
// Scale to odd size.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(151, output_width);
|
||||
EXPECT_EQ(101, output_height);
|
||||
}
|
||||
|
@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) {
|
|||
int output_width;
|
||||
int output_height;
|
||||
// Scale width only.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(100, output_width);
|
||||
EXPECT_EQ(100, output_height);
|
||||
// Scale height only.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
// Scale both dimensions.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(150, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
}
|
||||
|
@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) {
|
|||
int output_width;
|
||||
int output_height;
|
||||
// Set no targets, downscale to a multiple of 8.
|
||||
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(96, output_width);
|
||||
EXPECT_EQ(96, output_height);
|
||||
// Set width target, downscale to a multiple of 8.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(96, output_width);
|
||||
EXPECT_EQ(96, output_height);
|
||||
// Set height target, downscale to a multiple of 8.
|
||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
// Set both targets, downscale to a multiple of 8.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(144, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
// Doesn't throw error if keep aspect is true and downscale multiple is 2.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(400, output_width);
|
||||
EXPECT_EQ(200, output_height);
|
||||
// Throws error if keep aspect is true, but downscale multiple is not 2.
|
||||
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width,
|
||||
&output_height),
|
||||
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4,
|
||||
&output_width, &output_height),
|
||||
testing::Not(testing::status::IsOk()));
|
||||
// Downscaling to multiple ignored if multiple is less than 2.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width,
|
||||
&output_height));
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(401, output_width);
|
||||
EXPECT_EQ(201, output_height);
|
||||
}
|
||||
|
||||
// Tests scaling without keeping the aspect ratio fixed.
|
||||
TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) {
|
||||
int output_width;
|
||||
int output_height;
|
||||
// Smaller area.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_NEAR(
|
||||
200 / 100,
|
||||
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||
0.1f);
|
||||
EXPECT_LE(output_width * output_height, 9000);
|
||||
// Close to original area.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_NEAR(
|
||||
200.0 / 100.0,
|
||||
static_cast<double>(output_width) / static_cast<double>(output_height),
|
||||
0.1f);
|
||||
EXPECT_LE(output_width * output_height, 19999);
|
||||
// Don't scale with larger area.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(100, output_height);
|
||||
// Don't scale with equal area.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(100, output_height);
|
||||
// Don't scale at all.
|
||||
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2,
|
||||
&output_width, &output_height));
|
||||
EXPECT_EQ(200, output_width);
|
||||
EXPECT_EQ(100, output_height);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace scale_image
|
||||
} // namespace mediapipe
|
||||
|
|
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
211
mediapipe/calculators/image/warp_affine_calculator.cc
Normal file
|
@ -0,0 +1,211 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/image/warp_affine_calculator.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
|
||||
#include "mediapipe/calculators/image/warp_affine_calculator.pb.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
AffineTransformation::BorderMode GetBorderMode(
|
||||
mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) {
|
||||
switch (border_mode) {
|
||||
case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO:
|
||||
return AffineTransformation::BorderMode::kZero;
|
||||
case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED:
|
||||
case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE:
|
||||
return AffineTransformation::BorderMode::kReplicate;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ImageT>
|
||||
class WarpAffineRunnerHolder {};
|
||||
|
||||
template <>
|
||||
class WarpAffineRunnerHolder<ImageFrame> {
|
||||
public:
|
||||
using RunnerType = AffineTransformation::Runner<ImageFrame, ImageFrame>;
|
||||
absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); }
|
||||
absl::StatusOr<RunnerType*> GetRunner() {
|
||||
if (!runner_) {
|
||||
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner());
|
||||
}
|
||||
return runner_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<RunnerType> runner_;
|
||||
};
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
template <>
|
||||
class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
|
||||
public:
|
||||
using RunnerType =
|
||||
AffineTransformation::Runner<mediapipe::GpuBuffer,
|
||||
std::unique_ptr<mediapipe::GpuBuffer>>;
|
||||
absl::Status Open(CalculatorContext* cc) {
|
||||
gpu_origin_ =
|
||||
cc->Options<mediapipe::WarpAffineCalculatorOptions>().gpu_origin();
|
||||
gl_helper_ = std::make_shared<mediapipe::GlCalculatorHelper>();
|
||||
return gl_helper_->Open(cc);
|
||||
}
|
||||
absl::StatusOr<RunnerType*> GetRunner() {
|
||||
if (!runner_) {
|
||||
ASSIGN_OR_RETURN(
|
||||
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_));
|
||||
}
|
||||
return runner_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
mediapipe::GpuOrigin::Mode gpu_origin_;
|
||||
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper_;
|
||||
std::unique_ptr<RunnerType> runner_;
|
||||
};
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
template <>
|
||||
class WarpAffineRunnerHolder<mediapipe::Image> {
|
||||
public:
|
||||
absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); }
|
||||
absl::StatusOr<
|
||||
AffineTransformation::Runner<mediapipe::Image, mediapipe::Image>*>
|
||||
GetRunner() {
|
||||
return &runner_;
|
||||
}
|
||||
|
||||
private:
|
||||
class Runner : public AffineTransformation::Runner<mediapipe::Image,
|
||||
mediapipe::Image> {
|
||||
public:
|
||||
absl::Status Open(CalculatorContext* cc) {
|
||||
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
return absl::OkStatus();
|
||||
}
|
||||
absl::StatusOr<mediapipe::Image> Run(
|
||||
const mediapipe::Image& input, const std::array<float, 16>& matrix,
|
||||
const AffineTransformation::Size& size,
|
||||
AffineTransformation::BorderMode border_mode) override {
|
||||
if (input.UsesGpu()) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
|
||||
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
|
||||
size, border_mode));
|
||||
return mediapipe::Image(*result);
|
||||
#else
|
||||
return absl::UnavailableError("GPU support is disabled");
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
}
|
||||
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
|
||||
const auto& frame_ptr = input.GetImageFrameSharedPtr();
|
||||
// Wrap image into image frame.
|
||||
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
|
||||
frame_ptr->Height(), frame_ptr->WidthStep(),
|
||||
const_cast<uint8_t*>(frame_ptr->PixelData()),
|
||||
[](uint8* data) {});
|
||||
ASSIGN_OR_RETURN(auto result,
|
||||
runner->Run(image_frame, matrix, size, border_mode));
|
||||
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
|
||||
}
|
||||
|
||||
private:
|
||||
WarpAffineRunnerHolder<ImageFrame> cpu_holder_;
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
};
|
||||
|
||||
Runner runner_;
|
||||
};
|
||||
|
||||
template <typename InterfaceT>
|
||||
class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
|
||||
public:
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
|
||||
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
|
||||
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
if (InterfaceT::kInImage(cc).IsEmpty() ||
|
||||
InterfaceT::kMatrix(cc).IsEmpty() ||
|
||||
InterfaceT::kOutputSize(cc).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
|
||||
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
|
||||
AffineTransformation::Size output_size;
|
||||
output_size.width = out_width;
|
||||
output_size.height = out_height;
|
||||
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
|
||||
ASSIGN_OR_RETURN(
|
||||
auto result,
|
||||
runner->Run(
|
||||
*InterfaceT::kInImage(cc), transform, output_size,
|
||||
GetBorderMode(cc->Options<mediapipe::WarpAffineCalculatorOptions>()
|
||||
.border_mode())));
|
||||
InterfaceT::kOutImage(cc).Send(std::move(result));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
|
||||
holder_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||
WarpAffineCalculatorImpl<WarpAffineCalculatorCpu>);
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
MEDIAPIPE_NODE_IMPLEMENTATION(
|
||||
WarpAffineCalculatorImpl<WarpAffineCalculatorGpu>);
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl<WarpAffineCalculator>);
|
||||
|
||||
} // namespace mediapipe
|
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
94
mediapipe/calculators/image/warp_affine_calculator.h
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||
#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
||||
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Runs affine transformation.
|
||||
//
|
||||
// Input:
|
||||
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||
//
|
||||
// MATRIX - std::array<float, 16>
|
||||
// Used as following:
|
||||
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
|
||||
// matrix[4] * x + matrix[5] * y + matrix[7])
|
||||
// where x and y ranges are defined by @OUTPUT_SIZE.
|
||||
//
|
||||
// OUTPUT_SIZE - std::pair<int, int>
|
||||
// Size of the output image.
|
||||
//
|
||||
// Output:
|
||||
// IMAGE - Image/ImageFrame/GpuBuffer
|
||||
//
|
||||
// Note:
|
||||
// - Output image type and format are the same as the input one.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "WarpAffineCalculator(Cpu|Gpu)"
|
||||
// input_stream: "IMAGE:image"
|
||||
// input_stream: "MATRIX:matrix"
|
||||
// input_stream: "OUTPUT_SIZE:size"
|
||||
// output_stream: "IMAGE:transformed_image"
|
||||
// options: {
|
||||
// [mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||
// border_mode: BORDER_ZERO
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
template <typename ImageT>
|
||||
class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf {
|
||||
public:
|
||||
static constexpr mediapipe::api2::Input<ImageT> kInImage{"IMAGE"};
|
||||
static constexpr mediapipe::api2::Input<std::array<float, 16>> kMatrix{
|
||||
"MATRIX"};
|
||||
static constexpr mediapipe::api2::Input<std::pair<int, int>> kOutputSize{
|
||||
"OUTPUT_SIZE"};
|
||||
static constexpr mediapipe::api2::Output<ImageT> kOutImage{"IMAGE"};
|
||||
};
|
||||
|
||||
class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf<ImageFrame> {
|
||||
public:
|
||||
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix,
|
||||
kOutputSize, kOutImage);
|
||||
};
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
class WarpAffineCalculatorGpu
|
||||
: public WarpAffineCalculatorIntf<mediapipe::GpuBuffer> {
|
||||
public:
|
||||
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix,
|
||||
kOutputSize, kOutImage);
|
||||
};
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
class WarpAffineCalculator : public WarpAffineCalculatorIntf<mediapipe::Image> {
|
||||
public:
|
||||
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize,
|
||||
kOutImage);
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
|
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
46
mediapipe/calculators/image/warp_affine_calculator.proto
Normal file
|
@ -0,0 +1,46 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/gpu/gpu_origin.proto";
|
||||
|
||||
message WarpAffineCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional WarpAffineCalculatorOptions ext = 373693895;
|
||||
}
|
||||
|
||||
// Pixel extrapolation methods. See @border_mode.
|
||||
enum BorderMode {
|
||||
BORDER_UNSPECIFIED = 0;
|
||||
BORDER_ZERO = 1;
|
||||
BORDER_REPLICATE = 2;
|
||||
}
|
||||
|
||||
// Pixel extrapolation method.
|
||||
// When converting image to tensor it may happen that tensor needs to read
|
||||
// pixels outside image boundaries. Border mode helps to specify how such
|
||||
// pixels will be calculated.
|
||||
//
|
||||
// BORDER_REPLICATE is used by default.
|
||||
optional BorderMode border_mode = 1;
|
||||
|
||||
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||
// to be flipped vertically as tensors are expected to start at top.
|
||||
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||
optional GpuOrigin.Mode gpu_origin = 2;
|
||||
}
|
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
615
mediapipe/calculators/image/warp_affine_calculator_test.cc
Normal file
|
@ -0,0 +1,615 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/flags/flag.h"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/calculators/image/affine_transformation.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
cv::Mat GetRgb(absl::string_view path) {
|
||||
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||
cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3);
|
||||
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||
cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3);
|
||||
return rgb;
|
||||
}
|
||||
|
||||
cv::Mat GetRgba(absl::string_view path) {
|
||||
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||
cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0));
|
||||
int from_to[] = {0, 2, 1, 1, 2, 0};
|
||||
cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3);
|
||||
return bgr;
|
||||
}
|
||||
|
||||
// Test template.
|
||||
// No processing/assertions should be done after the function is invoked.
|
||||
void RunTest(const std::string& graph_text, const std::string& tag,
|
||||
const cv::Mat& input, cv::Mat expected_result,
|
||||
float similarity_threshold, std::array<float, 16> matrix,
|
||||
int out_width, int out_height,
|
||||
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||
std::string border_mode_str;
|
||||
if (border_mode) {
|
||||
switch (*border_mode) {
|
||||
case AffineTransformation::BorderMode::kReplicate:
|
||||
border_mode_str = "border_mode: BORDER_REPLICATE";
|
||||
break;
|
||||
case AffineTransformation::BorderMode::kZero:
|
||||
border_mode_str = "border_mode: BORDER_ZERO";
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
absl::Substitute(graph_text, /*$0=*/border_mode_str));
|
||||
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("output_image", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
|
||||
ImageFrame input_image(
|
||||
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
|
||||
input.cols, input.rows, input.step, input.data, [](uint8*) {});
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"input_image",
|
||||
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"matrix",
|
||||
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"output_size", MakePacket<std::pair<int, int>>(
|
||||
std::pair<int, int>(out_width, out_height))
|
||||
.At(Timestamp(0))));
|
||||
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||
|
||||
// Get and process results.
|
||||
const ImageFrame& out_frame = output_packets[0].Get<ImageFrame>();
|
||||
cv::Mat result = formats::MatView(&out_frame);
|
||||
double similarity =
|
||||
1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2);
|
||||
EXPECT_GE(similarity, similarity_threshold);
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
|
||||
MP_ASSERT_OK(graph.CloseInputStream("output_size"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
enum class InputType { kImageFrame, kImage };
|
||||
|
||||
// Similarity is checked against OpenCV results always, and due to differences
|
||||
// on how OpenCV and GL treats pixels there are two thresholds.
|
||||
// TODO: update to have just one threshold when OpenCV
|
||||
// implementation is updated.
|
||||
struct SimilarityConfig {
|
||||
double threshold_on_cpu;
|
||||
double threshold_on_gpu;
|
||||
};
|
||||
|
||||
void RunTest(cv::Mat input, cv::Mat expected_result,
|
||||
const SimilarityConfig& similarity, std::array<float, 16> matrix,
|
||||
int out_width, int out_height,
|
||||
absl::optional<AffineTransformation::BorderMode> border_mode) {
|
||||
RunTest(R"(
|
||||
input_stream: "input_image"
|
||||
input_stream: "output_size"
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "WarpAffineCalculatorCpu"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "MATRIX:matrix"
|
||||
input_stream: "OUTPUT_SIZE:output_size"
|
||||
output_stream: "IMAGE:output_image"
|
||||
options {
|
||||
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||
$0 # border mode
|
||||
}
|
||||
}
|
||||
}
|
||||
)",
|
||||
"cpu", input, expected_result, similarity.threshold_on_cpu, matrix,
|
||||
out_width, out_height, border_mode);
|
||||
|
||||
RunTest(R"(
|
||||
input_stream: "input_image"
|
||||
input_stream: "output_size"
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_CPU:input_image"
|
||||
output_stream: "IMAGE:input_image_unified"
|
||||
}
|
||||
node {
|
||||
calculator: "WarpAffineCalculator"
|
||||
input_stream: "IMAGE:input_image_unified"
|
||||
input_stream: "MATRIX:matrix"
|
||||
input_stream: "OUTPUT_SIZE:output_size"
|
||||
output_stream: "IMAGE:output_image_unified"
|
||||
options {
|
||||
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||
$0 # border mode
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:output_image_unified"
|
||||
output_stream: "IMAGE_CPU:output_image"
|
||||
}
|
||||
)",
|
||||
"cpu_image", input, expected_result, similarity.threshold_on_cpu,
|
||||
matrix, out_width, out_height, border_mode);
|
||||
|
||||
RunTest(R"(
|
||||
input_stream: "input_image"
|
||||
input_stream: "output_size"
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "ImageFrameToGpuBufferCalculator"
|
||||
input_stream: "input_image"
|
||||
output_stream: "input_image_gpu"
|
||||
}
|
||||
node {
|
||||
calculator: "WarpAffineCalculatorGpu"
|
||||
input_stream: "IMAGE:input_image_gpu"
|
||||
input_stream: "MATRIX:matrix"
|
||||
input_stream: "OUTPUT_SIZE:output_size"
|
||||
output_stream: "IMAGE:output_image_gpu"
|
||||
options {
|
||||
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||
$0 # border mode
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "output_image_gpu"
|
||||
output_stream: "output_image"
|
||||
}
|
||||
)",
|
||||
"gpu", input, expected_result, similarity.threshold_on_gpu, matrix,
|
||||
out_width, out_height, border_mode);
|
||||
|
||||
RunTest(R"(
|
||||
input_stream: "input_image"
|
||||
input_stream: "output_size"
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "ImageFrameToGpuBufferCalculator"
|
||||
input_stream: "input_image"
|
||||
output_stream: "input_image_gpu"
|
||||
}
|
||||
node {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_GPU:input_image_gpu"
|
||||
output_stream: "IMAGE:input_image_unified"
|
||||
}
|
||||
node {
|
||||
calculator: "WarpAffineCalculator"
|
||||
input_stream: "IMAGE:input_image_unified"
|
||||
input_stream: "MATRIX:matrix"
|
||||
input_stream: "OUTPUT_SIZE:output_size"
|
||||
output_stream: "IMAGE:output_image_unified"
|
||||
options {
|
||||
[mediapipe.WarpAffineCalculatorOptions.ext] {
|
||||
$0 # border mode
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:output_image_unified"
|
||||
output_stream: "IMAGE_GPU:output_image_gpu"
|
||||
}
|
||||
node {
|
||||
calculator: "GpuBufferToImageFrameCalculator"
|
||||
input_stream: "output_image_gpu"
|
||||
output_stream: "output_image"
|
||||
}
|
||||
)",
|
||||
"gpu_image", input, expected_result, similarity.threshold_on_gpu,
|
||||
matrix, out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
|
||||
bool keep_aspect_ratio, int out_width,
|
||||
int out_height) {
|
||||
std::array<float, 16> transform_mat;
|
||||
mediapipe::RotatedRect roi_absolute =
|
||||
mediapipe::GetRoi(input.cols, input.rows, roi);
|
||||
mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute)
|
||||
.IgnoreError();
|
||||
mediapipe::GetRotatedSubRectToRectTransformMatrix(
|
||||
roi_absolute, input.cols, input.rows,
|
||||
/*flip_horizontaly=*/false, &transform_mat);
|
||||
return transform_mat;
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_border_zero.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_with_rotation.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kReplicate;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = false;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kReplicate;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_with_rotation_border_zero.png");
|
||||
int out_width = 256;
|
||||
int out_height = 256;
|
||||
bool keep_aspect_ratio = false;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = false;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kReplicate;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = false;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kReplicate;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_border_zero.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||
auto input = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_with_rotation.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode = {};
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||
auto input = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_with_rotation_border_zero.png");
|
||||
int out_width = 128;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, NoOp) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.0f);
|
||||
roi.set_height(1.0f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||
int out_width = 64;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kReplicate;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
TEST(WarpAffineCalculatorTest, NoOpBorderZero) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.0f);
|
||||
roi.set_height(1.0f);
|
||||
roi.set_rotation(0);
|
||||
auto input = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg");
|
||||
auto expected_output = GetRgba(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/noop_except_range.png");
|
||||
int out_width = 64;
|
||||
int out_height = 128;
|
||||
bool keep_aspect_ratio = true;
|
||||
std::optional<AffineTransformation::BorderMode> border_mode =
|
||||
AffineTransformation::BorderMode::kZero;
|
||||
RunTest(input, expected_output,
|
||||
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
|
||||
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
|
||||
out_width, out_height, border_mode);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
|
@ -26,6 +26,11 @@ licenses(["notice"])
|
|||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
exports_files(
|
||||
glob(["testdata/image_to_tensor/*"]),
|
||||
visibility = ["//mediapipe/calculators/image:__subpackages__"],
|
||||
)
|
||||
|
||||
selects.config_setting_group(
|
||||
name = "compute_shader_unavailable",
|
||||
match_any = [
|
||||
|
|
|
@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer;
|
|||
// TENSORS - std::vector<Tensor>
|
||||
// Vector containing a single Tensor populated with an extrated RGB image.
|
||||
// MATRIX - std::array<float, 16> @Optional
|
||||
// An std::array<float, 16> representing a 4x4 row-major-order matrix which
|
||||
// can be used to map a point on the output tensor to a point on the input
|
||||
// image.
|
||||
// An std::array<float, 16> representing a 4x4 row-major-order matrix that
|
||||
// maps a point on the input image to a point on the output tensor, and
|
||||
// can be used to reverse the mapping by inverting the matrix.
|
||||
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
||||
// An std::array<float, 4> representing the letterbox padding from the 4
|
||||
// sides ([left, top, right, bottom]) of the output image, normalized to
|
||||
|
|
|
@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
uniform sampler2D frame;
|
||||
|
||||
void main() {
|
||||
$1 // flip
|
||||
vec4 pixel = texture2D(frame, sample_coordinate);
|
||||
vec2 coord = $1
|
||||
vec4 pixel = texture2D(frame, coord);
|
||||
$2 // normalize [-1,1]
|
||||
fragColor.r = pixel.r; // r channel
|
||||
$3 // g & b channels
|
||||
|
@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
})",
|
||||
/*$0=*/single_channel ? "vec1" : "vec4",
|
||||
/*$1=*/
|
||||
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;"
|
||||
: "",
|
||||
flip_vertically_
|
||||
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
|
||||
: "sample_coordinate;",
|
||||
/*$2=*/output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
|
|
|
@ -587,9 +587,21 @@ cc_library(
|
|||
"//mediapipe/framework/port:ret_check",
|
||||
] + select({
|
||||
"//conditions:default": [
|
||||
"//mediapipe/framework/port:file_helpers",
|
||||
],
|
||||
}),
|
||||
"//mediapipe:android": [],
|
||||
}) + select(
|
||||
{
|
||||
"//conditions:default": [
|
||||
],
|
||||
},
|
||||
) + select(
|
||||
{
|
||||
"//conditions:default": [
|
||||
],
|
||||
"//mediapipe:android": [
|
||||
],
|
||||
},
|
||||
),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
|
|||
const char kImageTag[] = "IMAGE";
|
||||
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
|
||||
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
|
||||
const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_";
|
||||
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||
const char kBBoxTag[] = "BBOX";
|
||||
const char kKeypointsTag[] = "KEYPOINTS";
|
||||
|
@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
|
||||
cc->Inputs().Tag(tag).Set<std::vector<float>>();
|
||||
}
|
||||
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||
cc->Inputs().Tag(tag).Set<std::vector<std::string>>();
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
|
||||
|
@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
mpms::ClearFeatureFloats(key, sequence_.get());
|
||||
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||
}
|
||||
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
|
||||
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||
sizeof(*kBytesFeaturePrefixTag) -
|
||||
1);
|
||||
mpms::ClearFeatureBytes(key, sequence_.get());
|
||||
mpms::ClearFeatureTimestamp(key, sequence_.get());
|
||||
}
|
||||
if (absl::StartsWith(tag, kKeypointsTag)) {
|
||||
std::string key =
|
||||
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
|
||||
|
@ -405,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
|
|||
cc->Inputs().Tag(tag).Get<std::vector<float>>(),
|
||||
sequence_.get());
|
||||
}
|
||||
if (absl::StartsWith(tag, kBytesFeaturePrefixTag) &&
|
||||
!cc->Inputs().Tag(tag).IsEmpty()) {
|
||||
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
|
||||
sizeof(*kBytesFeaturePrefixTag) -
|
||||
1);
|
||||
mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(),
|
||||
sequence_.get());
|
||||
mpms::AddFeatureBytes(
|
||||
key, cc->Inputs().Tag(tag).Get<std::vector<std::string>>(),
|
||||
sequence_.get());
|
||||
}
|
||||
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
|
||||
std::string key = "";
|
||||
if (tag != kBBoxTag) {
|
||||
|
|
|
@ -49,6 +49,8 @@ constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST";
|
|||
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
|
||||
constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
|
||||
constexpr char kAudioTestTag[] = "AUDIO_TEST";
|
||||
constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER";
|
||||
constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST";
|
||||
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
|
||||
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
|
||||
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
|
||||
|
@ -215,6 +217,54 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) {
|
||||
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||
false, true);
|
||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||
|
||||
int num_timesteps = 2;
|
||||
for (int i = 0; i < num_timesteps; ++i) {
|
||||
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||
2, absl::StrCat("foo", 2 << i));
|
||||
runner_->MutableInputs()
|
||||
->Tag(kBytesFeatureTestTag)
|
||||
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||
2, absl::StrCat("bar", 2 << i));
|
||||
runner_->MutableInputs()
|
||||
->Tag(kBytesFeatureOtherTag)
|
||||
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
|
||||
}
|
||||
|
||||
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||
Adopt(input_sequence.release());
|
||||
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const tf::SequenceExample& output_sequence =
|
||||
output_packets[0].Get<tf::SequenceExample>();
|
||||
|
||||
ASSERT_EQ(num_timesteps,
|
||||
mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence));
|
||||
ASSERT_EQ(num_timesteps,
|
||||
mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence));
|
||||
for (int i = 0; i < num_timesteps; ++i) {
|
||||
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i));
|
||||
ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i),
|
||||
::testing::ElementsAreArray(
|
||||
std::vector<std::string>(2, absl::StrCat("foo", 2 << i))));
|
||||
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i));
|
||||
ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i),
|
||||
::testing::ElementsAreArray(
|
||||
std::vector<std::string>(2, absl::StrCat("bar", 2 << i))));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
|
||||
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
|
||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||
|
@ -829,6 +879,45 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) {
|
|||
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) {
|
||||
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
|
||||
false, true);
|
||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||
|
||||
int num_timesteps = 2;
|
||||
for (int i = 0; i < num_timesteps; ++i) {
|
||||
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||
2, absl::StrCat("foo", 2 << i));
|
||||
mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get());
|
||||
mpms::AddFeatureTimestamp("TEST", i, input_sequence.get());
|
||||
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
|
||||
2, absl::StrCat("bar", 2 << i));
|
||||
mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get());
|
||||
mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get());
|
||||
}
|
||||
ASSERT_EQ(num_timesteps,
|
||||
mpms::GetFeatureTimestampSize("TEST", *input_sequence));
|
||||
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence));
|
||||
ASSERT_EQ(num_timesteps,
|
||||
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
|
||||
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence));
|
||||
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
|
||||
Adopt(input_sequence.release());
|
||||
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner_->Outputs().Tag(kSequenceExampleTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const tf::SequenceExample& output_sequence =
|
||||
output_packets[0].Get<tf::SequenceExample>();
|
||||
|
||||
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence));
|
||||
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence));
|
||||
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence));
|
||||
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
|
||||
}
|
||||
|
||||
TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
|
||||
SetUpCalculator({"IMAGE:images"}, {}, false, true);
|
||||
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
|
||||
|
|
|
@ -162,6 +162,27 @@ selects.config_setting_group(
|
|||
],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "edge_tpu_usb",
|
||||
define_values = {
|
||||
"MEDIAPIPE_EDGE_TPU": "usb",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "edge_tpu_pci",
|
||||
define_values = {
|
||||
"MEDIAPIPE_EDGE_TPU": "pci",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "edge_tpu_all",
|
||||
define_values = {
|
||||
"MEDIAPIPE_EDGE_TPU": "all",
|
||||
},
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tflite_inference_calculator",
|
||||
srcs = ["tflite_inference_calculator.cc"],
|
||||
|
@ -172,6 +193,12 @@ cc_library(
|
|||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
defines = select({
|
||||
"//conditions:default": [],
|
||||
":edge_tpu_usb": ["MEDIAPIPE_EDGE_TPU=usb"],
|
||||
":edge_tpu_pci": ["MEDIAPIPE_EDGE_TPU=pci"],
|
||||
":edge_tpu_all": ["MEDIAPIPE_EDGE_TPU=all"],
|
||||
}),
|
||||
linkopts = select({
|
||||
"//mediapipe:ios": [
|
||||
"-framework CoreVideo",
|
||||
|
@ -223,6 +250,20 @@ cc_library(
|
|||
"//conditions:default": [
|
||||
"//mediapipe/util:cpu_util",
|
||||
],
|
||||
}) + select({
|
||||
"//conditions:default": [],
|
||||
":edge_tpu_usb": [
|
||||
"@libedgetpu//tflite/public:edgetpu",
|
||||
"@libedgetpu//tflite/public:oss_edgetpu_direct_usb",
|
||||
],
|
||||
":edge_tpu_pci": [
|
||||
"@libedgetpu//tflite/public:edgetpu",
|
||||
"@libedgetpu//tflite/public:oss_edgetpu_direct_pci",
|
||||
],
|
||||
":edge_tpu_all": [
|
||||
"@libedgetpu//tflite/public:edgetpu",
|
||||
"@libedgetpu//tflite/public:oss_edgetpu_direct_all",
|
||||
],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
@ -85,7 +85,22 @@ constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
|
|||
} // namespace
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
#include "edgetpu.h"
|
||||
#include "tflite/public/edgetpu.h"
|
||||
|
||||
// Checkes whether model contains Edge TPU custom op or not.
|
||||
bool ContainsEdgeTpuCustomOp(const tflite::FlatBufferModel& model) {
|
||||
const auto* opcodes = model.GetModel()->operator_codes();
|
||||
for (const auto* subgraph : *model.GetModel()->subgraphs()) {
|
||||
for (const auto* op : *subgraph->operators()) {
|
||||
const auto* opcode = opcodes->Get(op->opcode_index());
|
||||
if (opcode->custom_code() &&
|
||||
opcode->custom_code()->str() == edgetpu::kCustomOp) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Creates and returns an Edge TPU interpreter to run the given edgetpu model.
|
||||
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
||||
|
@ -94,14 +109,9 @@ std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
|||
edgetpu::EdgeTpuContext* edgetpu_context) {
|
||||
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
|
||||
std::unique_ptr<tflite::Interpreter> interpreter;
|
||||
if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) {
|
||||
std::cerr << "Failed to build edge TPU interpreter." << std::endl;
|
||||
}
|
||||
CHECK_EQ(tflite::InterpreterBuilder(model, *resolver)(&interpreter),
|
||||
kTfLiteOk);
|
||||
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
|
||||
interpreter->SetNumThreads(1);
|
||||
if (interpreter->AllocateTensors() != kTfLiteOk) {
|
||||
std::cerr << "Failed to allocate edge TPU tensors." << std::endl;
|
||||
}
|
||||
return interpreter;
|
||||
}
|
||||
#endif // MEDIAPIPE_EDGE_TPU
|
||||
|
@ -279,8 +289,7 @@ class TfLiteInferenceCalculator : public CalculatorBase {
|
|||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_ =
|
||||
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
|
||||
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_;
|
||||
#endif
|
||||
|
||||
bool gpu_inference_ = false;
|
||||
|
@ -303,6 +312,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
|
|||
// Calculator Core Section
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kCustomOpResolverTag[] = "CUSTOM_OP_RESOLVER";
|
||||
constexpr char kModelTag[] = "MODEL";
|
||||
|
||||
template <class CC>
|
||||
bool ShouldUseGpu(CC* cc) {
|
||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
|
@ -327,7 +340,7 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
|
|||
const auto& options =
|
||||
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
|
||||
RET_CHECK(!options.model_path().empty() ^
|
||||
cc->InputSidePackets().HasTag("MODEL"))
|
||||
cc->InputSidePackets().HasTag(kModelTag))
|
||||
<< "Either model as side packet or model path in options is required.";
|
||||
|
||||
if (cc->Inputs().HasTag(kTensorsTag))
|
||||
|
@ -340,13 +353,13 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
|
|||
if (cc->Outputs().HasTag(kTensorsGpuTag))
|
||||
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
|
||||
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||
cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Tag(kCustomOpResolverTag)
|
||||
.Set<tflite::ops::builtin::BuiltinOpResolver>();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("MODEL")) {
|
||||
cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
|
||||
if (cc->InputSidePackets().HasTag(kModelTag)) {
|
||||
cc->InputSidePackets().Tag(kModelTag).Set<TfLiteModelPtr>();
|
||||
}
|
||||
|
||||
if (ShouldUseGpu(cc)) {
|
||||
|
@ -486,8 +499,8 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
|
|||
MP_RETURN_IF_ERROR(WriteKernelsToFile());
|
||||
|
||||
return RunInContextIfNeeded([this]() -> absl::Status {
|
||||
interpreter_ = nullptr;
|
||||
if (delegate_) {
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
if (gpu_inference_) {
|
||||
|
@ -501,7 +514,7 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
|
|||
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
}
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
edgetpu_context_.reset();
|
||||
edgetpu_context_ = nullptr;
|
||||
#endif
|
||||
return absl::OkStatus();
|
||||
});
|
||||
|
@ -723,9 +736,9 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
|
|||
auto op_resolver_ptr =
|
||||
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
||||
&default_op_resolver);
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||
op_resolver_ptr = &(cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Tag(kCustomOpResolverTag)
|
||||
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
||||
}
|
||||
|
||||
|
@ -825,21 +838,26 @@ absl::Status TfLiteInferenceCalculator::LoadModel(CalculatorContext* cc) {
|
|||
|
||||
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates
|
||||
default_op_resolver;
|
||||
auto op_resolver_ptr =
|
||||
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
||||
&default_op_resolver);
|
||||
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
op_resolver_ptr = &(cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
||||
}
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
interpreter_ =
|
||||
BuildEdgeTpuInterpreter(model, op_resolver_ptr, edgetpu_context_.get());
|
||||
#else
|
||||
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_);
|
||||
if (ContainsEdgeTpuCustomOp(model)) {
|
||||
edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
|
||||
interpreter_ = BuildEdgeTpuInterpreter(model, &default_op_resolver,
|
||||
edgetpu_context_.get());
|
||||
} else {
|
||||
#endif // MEDIAPIPE_EDGE_TPU
|
||||
auto op_resolver_ptr =
|
||||
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
|
||||
&default_op_resolver);
|
||||
|
||||
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
|
||||
op_resolver_ptr = &(cc->InputSidePackets()
|
||||
.Tag(kCustomOpResolverTag)
|
||||
.Get<tflite::ops::builtin::BuiltinOpResolver>());
|
||||
}
|
||||
|
||||
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_);
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
}
|
||||
#endif // MEDIAPIPE_EDGE_TPU
|
||||
|
||||
RET_CHECK(interpreter_);
|
||||
|
@ -872,8 +890,8 @@ absl::StatusOr<Packet> TfLiteInferenceCalculator::GetModelAsPacket(
|
|||
if (!options.model_path().empty()) {
|
||||
return TfLiteModelLoader::LoadFromPath(options.model_path());
|
||||
}
|
||||
if (cc.InputSidePackets().HasTag("MODEL")) {
|
||||
return cc.InputSidePackets().Tag("MODEL");
|
||||
if (cc.InputSidePackets().HasTag(kModelTag)) {
|
||||
return cc.InputSidePackets().Tag(kModelTag);
|
||||
}
|
||||
return absl::Status(absl::StatusCode::kNotFound,
|
||||
"Must specify TFLite model as path or loaded model.");
|
||||
|
@ -929,6 +947,8 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
|
|||
kTfLiteOk);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
#else
|
||||
(void)use_xnnpack;
|
||||
#endif // !EDGETPU
|
||||
|
||||
// Return and use default tflite infernece (on CPU). No need for GPU
|
||||
|
|
|
@ -1353,3 +1353,34 @@ cc_test(
|
|||
"//mediapipe/framework/port:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "inverse_matrix_calculator",
|
||||
srcs = ["inverse_matrix_calculator.cc"],
|
||||
hdrs = ["inverse_matrix_calculator.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/api2:node",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"@com_google_absl//absl/status",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
alwayslink = True,
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "inverse_matrix_calculator_test",
|
||||
srcs = ["inverse_matrix_calculator_test.cc"],
|
||||
tags = ["desktop_only_test"],
|
||||
deps = [
|
||||
":inverse_matrix_calculator",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -33,6 +33,7 @@ namespace {
|
|||
constexpr char kImageFrameTag[] = "IMAGE_CPU";
|
||||
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
constexpr char kSourceOnGpuTag[] = "SOURCE_ON_GPU";
|
||||
} // namespace
|
||||
|
||||
// A calculator for converting the unified image container into
|
||||
|
@ -46,6 +47,8 @@ constexpr char kImageTag[] = "IMAGE";
|
|||
// IMAGE_CPU: An ImageFrame containing output image.
|
||||
// IMAGE_GPU: A GpuBuffer containing output image.
|
||||
//
|
||||
// SOURCE_ON_GPU: The source Image is stored on GPU or CPU.
|
||||
//
|
||||
// Note:
|
||||
// Data is automatically transferred to/from the CPU or GPU
|
||||
// depending on output type.
|
||||
|
@ -66,6 +69,7 @@ class FromImageCalculator : public CalculatorBase {
|
|||
absl::Status RenderGpu(CalculatorContext* cc);
|
||||
absl::Status RenderCpu(CalculatorContext* cc);
|
||||
|
||||
bool check_image_source_ = false;
|
||||
bool gpu_output_ = false;
|
||||
bool gpu_initialized_ = false;
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
|
@ -102,6 +106,9 @@ absl::Status FromImageCalculator::GetContract(CalculatorContract* cc) {
|
|||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
|
||||
cc->Outputs().Tag(kSourceOnGpuTag).Set<bool>();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -111,7 +118,9 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
|
|||
if (cc->Outputs().HasTag(kGpuBufferTag)) {
|
||||
gpu_output_ = true;
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
|
||||
check_image_source_ = true;
|
||||
}
|
||||
if (gpu_output_) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||
|
@ -122,6 +131,13 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
|
|||
}
|
||||
|
||||
absl::Status FromImageCalculator::Process(CalculatorContext* cc) {
|
||||
if (check_image_source_) {
|
||||
auto& input = cc->Inputs().Tag(kImageTag).Get<mediapipe::Image>();
|
||||
cc->Outputs()
|
||||
.Tag(kSourceOnGpuTag)
|
||||
.AddPacket(MakePacket<bool>(input.UsesGpu()).At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
if (gpu_output_) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status {
|
||||
|
|
50
mediapipe/calculators/util/inverse_matrix_calculator.cc
Normal file
50
mediapipe/calculators/util/inverse_matrix_calculator.cc
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "Eigen/Geometry"
|
||||
#include "Eigen/LU"
|
||||
#include "absl/status/status.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace api2 {
|
||||
|
||||
class InverseMatrixCalculatorImpl : public NodeImpl<InverseMatrixCalculator> {
|
||||
absl::Status Process(mediapipe::CalculatorContext* cc) override {
|
||||
if (kInputMatrix(cc).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> matrix(
|
||||
kInputMatrix(cc).Get().data());
|
||||
|
||||
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> inverse_matrix;
|
||||
bool inverse_check;
|
||||
matrix.computeInverseWithCheck(inverse_matrix, inverse_check);
|
||||
RET_CHECK(inverse_check) << "Inverse matrix cannot be calculated.";
|
||||
|
||||
std::array<float, 16> output;
|
||||
Eigen::Map<Eigen::Matrix<float, 4, 4, Eigen::RowMajor>>(
|
||||
output.data(), 4, 4) = inverse_matrix.matrix();
|
||||
kOutputMatrix(cc).Send(std::move(output));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
};
|
||||
MEDIAPIPE_NODE_IMPLEMENTATION(InverseMatrixCalculatorImpl);
|
||||
|
||||
} // namespace api2
|
||||
} // namespace mediapipe
|
51
mediapipe/calculators/util/inverse_matrix_calculator.h
Normal file
51
mediapipe/calculators/util/inverse_matrix_calculator.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
||||
#define MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
||||
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Runs affine transformation.
|
||||
//
|
||||
// Input:
|
||||
// MATRIX - std::array<float, 16>
|
||||
// Row major 4x4 matrix to inverse.
|
||||
//
|
||||
// Output:
|
||||
// MATRIX - std::array<float, 16>
|
||||
// Row major 4x4 inversed matrix.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "dishti.aimatter.InverseMatrixCalculator"
|
||||
// input_stream: "MATRIX:input_matrix"
|
||||
// output_stream: "MATRIX:output_matrix"
|
||||
// }
|
||||
class InverseMatrixCalculator : public mediapipe::api2::NodeIntf {
|
||||
public:
|
||||
static constexpr mediapipe::api2::Input<std::array<float, 16>> kInputMatrix{
|
||||
"MATRIX"};
|
||||
static constexpr mediapipe::api2::Output<std::array<float, 16>> kOutputMatrix{
|
||||
"MATRIX"};
|
||||
MEDIAPIPE_NODE_INTERFACE(InverseMatrixCalculator, kInputMatrix,
|
||||
kOutputMatrix);
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
|
126
mediapipe/calculators/util/inverse_matrix_calculator_test.cc
Normal file
126
mediapipe/calculators/util/inverse_matrix_calculator_test.cc
Normal file
|
@ -0,0 +1,126 @@
|
|||
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
void RunTest(const std::array<float, 16>& matrix,
|
||||
const std::array<float, 16>& expected_inverse_matrix) {
|
||||
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
R"pb(
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "InverseMatrixCalculator"
|
||||
input_stream: "MATRIX:matrix"
|
||||
output_stream: "MATRIX:inverse_matrix"
|
||||
}
|
||||
)pb");
|
||||
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("inverse_matrix", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"matrix",
|
||||
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
|
||||
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||
|
||||
const auto& inverse_matrix = output_packets[0].Get<std::array<float, 16>>();
|
||||
|
||||
EXPECT_THAT(inverse_matrix, testing::Eq(expected_inverse_matrix));
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST(InverseMatrixCalculatorTest, Identity) {
|
||||
// clang-format off
|
||||
std::array<float, 16> matrix = {
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::array<float, 16> expected_inverse_matrix = {
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
// clang-format on
|
||||
RunTest(matrix, expected_inverse_matrix);
|
||||
}
|
||||
|
||||
TEST(InverseMatrixCalculatorTest, Translation) {
|
||||
// clang-format off
|
||||
std::array<float, 16> matrix = {
|
||||
1.0f, 0.0f, 0.0f, 2.0f,
|
||||
0.0f, 1.0f, 0.0f, -5.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::array<float, 16> expected_inverse_matrix = {
|
||||
1.0f, 0.0f, 0.0f, -2.0f,
|
||||
0.0f, 1.0f, 0.0f, 5.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
// clang-format on
|
||||
RunTest(matrix, expected_inverse_matrix);
|
||||
}
|
||||
|
||||
TEST(InverseMatrixCalculatorTest, Scale) {
|
||||
// clang-format off
|
||||
std::array<float, 16> matrix = {
|
||||
5.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 2.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::array<float, 16> expected_inverse_matrix = {
|
||||
0.2f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.5f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
// clang-format on
|
||||
RunTest(matrix, expected_inverse_matrix);
|
||||
}
|
||||
|
||||
TEST(InverseMatrixCalculatorTest, Rotation90) {
|
||||
// clang-format off
|
||||
std::array<float, 16> matrix = {
|
||||
0.0f, -1.0f, 0.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
std::array<float, 16> expected_inverse_matrix = {
|
||||
0.0f, 1.0f, 0.0f, 0.0f,
|
||||
-1.0f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
// clang-format on
|
||||
RunTest(matrix, expected_inverse_matrix);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
16
mediapipe/examples/android/solutions/create_win_symlinks.bat
Normal file
16
mediapipe/examples/android/solutions/create_win_symlinks.bat
Normal file
|
@ -0,0 +1,16 @@
|
|||
@rem Remove the current res dir symlinks that are for Linux and macOS and recreate res dir symlinks for Windows.
|
||||
@rem This script needs administrator permission. Must run this script as administrator.
|
||||
|
||||
@rem for hands example app.
|
||||
cd /d %~dp0
|
||||
cd hands\src\main
|
||||
rm res
|
||||
mklink /d res ..\..\..\res
|
||||
|
||||
@rem for facemesh example app.
|
||||
cd /d %~dp0
|
||||
cd facemesh\src\main
|
||||
rm res
|
||||
mklink /d res ..\..\..\res
|
||||
dir
|
||||
pause
|
50
mediapipe/examples/android/solutions/facemesh/build.gradle
Normal file
50
mediapipe/examples/android/solutions/facemesh/build.gradle
Normal file
|
@ -0,0 +1,50 @@
|
|||
plugins {
|
||||
id 'com.android.application'
|
||||
}
|
||||
|
||||
android {
|
||||
compileSdkVersion 30
|
||||
buildToolsVersion "30.0.3"
|
||||
|
||||
defaultConfig {
|
||||
applicationId "com.google.mediapipe.apps.hands"
|
||||
minSdkVersion 21
|
||||
targetSdkVersion 30
|
||||
versionCode 1
|
||||
versionName "1.0"
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
release {
|
||||
minifyEnabled false
|
||||
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
|
||||
}
|
||||
}
|
||||
compileOptions {
|
||||
sourceCompatibility JavaVersion.VERSION_1_8
|
||||
targetCompatibility JavaVersion.VERSION_1_8
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar'])
|
||||
implementation 'androidx.appcompat:appcompat:1.3.0'
|
||||
implementation 'com.google.android.material:material:1.3.0'
|
||||
implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
|
||||
testImplementation 'junit:junit:4.+'
|
||||
androidTestImplementation 'androidx.test.ext:junit:1.1.2'
|
||||
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
|
||||
// MediaPipe hands solution API and solution-core.
|
||||
implementation 'com.google.mediapipe:solution-core:latest.release'
|
||||
implementation 'com.google.mediapipe:facemesh:latest.release'
|
||||
// MediaPipe deps
|
||||
implementation 'com.google.flogger:flogger:latest.release'
|
||||
implementation 'com.google.flogger:flogger-system-backend:latest.release'
|
||||
implementation 'com.google.guava:guava:27.0.1-android'
|
||||
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||
// CameraX core library
|
||||
def camerax_version = "1.0.0-beta10"
|
||||
implementation "androidx.camera:camera-core:$camerax_version"
|
||||
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||
}
|
21
mediapipe/examples/android/solutions/facemesh/proguard-rules.pro
vendored
Normal file
21
mediapipe/examples/android/solutions/facemesh/proguard-rules.pro
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Add project specific ProGuard rules here.
|
||||
# You can control the set of applied configuration files using the
|
||||
# proguardFiles setting in build.gradle.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
||||
|
||||
# Uncomment this to preserve the line number information for
|
||||
# debugging stack traces.
|
||||
#-keepattributes SourceFile,LineNumberTable
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
|
@ -0,0 +1,32 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="com.google.mediapipe.examples.facemesh">
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="21"
|
||||
android:targetSdkVersion="30" />
|
||||
|
||||
<!-- For loading images from gallery -->
|
||||
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
|
||||
<!-- For using the camera -->
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-feature android:name="android.hardware.camera" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="MediaPipe FaceMesh"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<activity android:name=".MainActivity"
|
||||
android:screenOrientation="portrait">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
44
mediapipe/examples/android/solutions/facemesh/src/main/BUILD
Normal file
44
mediapipe/examples/android/solutions/facemesh/src/main/BUILD
Normal file
|
@ -0,0 +1,44 @@
|
|||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
android_binary(
|
||||
name = "facemesh",
|
||||
srcs = glob(["**/*.java"]),
|
||||
custom_package = "com.google.mediapipe.examples.facemesh",
|
||||
manifest = "AndroidManifest.xml",
|
||||
manifest_values = {
|
||||
"applicationId": "com.google.mediapipe.examples.facemesh",
|
||||
},
|
||||
multidex = "native",
|
||||
resource_files = ["//mediapipe/examples/android/solutions:resource_files"],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
|
||||
"//mediapipe/java/com/google/mediapipe/solutions/facemesh",
|
||||
"//third_party:androidx_appcompat",
|
||||
"//third_party:androidx_constraint_layout",
|
||||
"//third_party:opencv",
|
||||
"@maven//:androidx_activity_activity",
|
||||
"@maven//:androidx_concurrent_concurrent_futures",
|
||||
"@maven//:androidx_fragment_fragment",
|
||||
"@maven//:com_google_guava_guava",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,186 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.examples.facemesh;
|
||||
|
||||
import android.opengl.GLES20;
|
||||
import android.opengl.Matrix;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutioncore.ResultGlBoundary;
|
||||
import com.google.mediapipe.solutioncore.ResultGlRenderer;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.util.List;
|
||||
|
||||
/** A custom implementation of {@link ResultGlRenderer} to render MediaPope FaceMesh results. */
|
||||
public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult> {
|
||||
private static final String TAG = "FaceMeshResultGlRenderer";
|
||||
|
||||
private static final float[] TESSELATION_COLOR = new float[] {0.75f, 0.75f, 0.75f, 0.5f};
|
||||
private static final int TESSELATION_THICKNESS = 5;
|
||||
private static final float[] RIGHT_EYE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||
private static final int RIGHT_EYE_THICKNESS = 8;
|
||||
private static final float[] RIGHT_EYEBROW_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||
private static final int RIGHT_EYEBROW_THICKNESS = 8;
|
||||
private static final float[] LEFT_EYE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||
private static final int LEFT_EYE_THICKNESS = 8;
|
||||
private static final float[] LEFT_EYEBROW_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||
private static final int LEFT_EYEBROW_THICKNESS = 8;
|
||||
private static final float[] FACE_OVAL_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
|
||||
private static final int FACE_OVAL_THICKNESS = 8;
|
||||
private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
|
||||
private static final int LIPS_THICKNESS = 8;
|
||||
private static final String VERTEX_SHADER =
|
||||
"uniform mat4 uTransformMatrix;\n"
|
||||
+ "attribute vec4 vPosition;\n"
|
||||
+ "void main() {\n"
|
||||
+ " gl_Position = uTransformMatrix * vPosition;\n"
|
||||
+ "}";
|
||||
private static final String FRAGMENT_SHADER =
|
||||
"precision mediump float;\n"
|
||||
+ "uniform vec4 uColor;\n"
|
||||
+ "void main() {\n"
|
||||
+ " gl_FragColor = uColor;\n"
|
||||
+ "}";
|
||||
private int program;
|
||||
private int positionHandle;
|
||||
private int transformMatrixHandle;
|
||||
private int colorHandle;
|
||||
private final float[] transformMatrix = new float[16];
|
||||
|
||||
private int loadShader(int type, String shaderCode) {
|
||||
int shader = GLES20.glCreateShader(type);
|
||||
GLES20.glShaderSource(shader, shaderCode);
|
||||
GLES20.glCompileShader(shader);
|
||||
return shader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setupRendering() {
|
||||
program = GLES20.glCreateProgram();
|
||||
int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER);
|
||||
int fragmentShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER);
|
||||
GLES20.glAttachShader(program, vertexShader);
|
||||
GLES20.glAttachShader(program, fragmentShader);
|
||||
GLES20.glLinkProgram(program);
|
||||
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
|
||||
transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix");
|
||||
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void renderResult(FaceMeshResult result, ResultGlBoundary boundary) {
|
||||
if (result == null) {
|
||||
return;
|
||||
}
|
||||
GLES20.glUseProgram(program);
|
||||
// Sets the transform matrix to align the result rendering with the scaled output texture.
|
||||
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
|
||||
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
|
||||
// top-left corner.
|
||||
Matrix.setIdentityM(transformMatrix, 0);
|
||||
Matrix.scaleM(
|
||||
transformMatrix,
|
||||
0,
|
||||
2 / (boundary.right() - boundary.left()),
|
||||
-2 / (boundary.top() - boundary.bottom()),
|
||||
1.0f);
|
||||
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
|
||||
|
||||
int numFaces = result.multiFaceLandmarks().size();
|
||||
for (int i = 0; i < numFaces; ++i) {
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_TESSELATION,
|
||||
TESSELATION_COLOR,
|
||||
TESSELATION_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_RIGHT_EYE,
|
||||
RIGHT_EYE_COLOR,
|
||||
RIGHT_EYE_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
|
||||
RIGHT_EYEBROW_COLOR,
|
||||
RIGHT_EYEBROW_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LEFT_EYE,
|
||||
LEFT_EYE_COLOR,
|
||||
LEFT_EYE_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
|
||||
LEFT_EYEBROW_COLOR,
|
||||
LEFT_EYEBROW_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_FACE_OVAL,
|
||||
FACE_OVAL_COLOR,
|
||||
FACE_OVAL_THICKNESS);
|
||||
drawLandmarks(
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LIPS,
|
||||
LIPS_COLOR,
|
||||
LIPS_THICKNESS);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls this to delete the shader program.
|
||||
*
|
||||
* <p>This is only necessary if one wants to release the program while keeping the context around.
|
||||
*/
|
||||
public void release() {
|
||||
GLES20.glDeleteProgram(program);
|
||||
}
|
||||
|
||||
private void drawLandmarks(
|
||||
List<NormalizedLandmark> faceLandmarkList,
|
||||
ImmutableSet<FaceMeshConnections.Connection> connections,
|
||||
float[] colorArray,
|
||||
int thickness) {
|
||||
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
|
||||
GLES20.glLineWidth(thickness);
|
||||
for (FaceMeshConnections.Connection c : connections) {
|
||||
float[] vertex = new float[4];
|
||||
NormalizedLandmark start = faceLandmarkList.get(c.start());
|
||||
vertex[0] = normalizedLandmarkValue(start.getX());
|
||||
vertex[1] = normalizedLandmarkValue(start.getY());
|
||||
NormalizedLandmark end = faceLandmarkList.get(c.end());
|
||||
vertex[2] = normalizedLandmarkValue(end.getX());
|
||||
vertex[3] = normalizedLandmarkValue(end.getY());
|
||||
FloatBuffer vertexBuffer =
|
||||
ByteBuffer.allocateDirect(vertex.length * 4)
|
||||
.order(ByteOrder.nativeOrder())
|
||||
.asFloatBuffer()
|
||||
.put(vertex);
|
||||
vertexBuffer.position(0);
|
||||
GLES20.glEnableVertexAttribArray(positionHandle);
|
||||
GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer);
|
||||
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
|
||||
}
|
||||
}
|
||||
|
||||
// Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate
|
||||
// value range: [-1, 1].
|
||||
private float normalizedLandmarkValue(float value) {
|
||||
return value * 2 - 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,158 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.examples.facemesh;
|
||||
|
||||
import android.content.Context;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.Canvas;
|
||||
import android.graphics.Color;
|
||||
import android.graphics.Matrix;
|
||||
import android.graphics.Paint;
|
||||
import androidx.appcompat.widget.AppCompatImageView;
|
||||
import android.util.Size;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||
import java.util.List;
|
||||
|
||||
/** An ImageView implementation for displaying MediaPipe FaceMesh results. */
|
||||
public class FaceMeshResultImageView extends AppCompatImageView {
|
||||
private static final String TAG = "FaceMeshResultImageView";
|
||||
|
||||
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
|
||||
private static final int TESSELATION_THICKNESS = 5;
|
||||
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int RIGHT_EYE_THICKNESS = 8;
|
||||
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int RIGHT_EYEBROW_THICKNESS = 8;
|
||||
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int LEFT_EYE_THICKNESS = 8;
|
||||
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int LEFT_EYEBROW_THICKNESS = 8;
|
||||
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
|
||||
private static final int FACE_OVAL_THICKNESS = 8;
|
||||
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
|
||||
private static final int LIPS_THICKNESS = 8;
|
||||
private Bitmap latest;
|
||||
|
||||
public FaceMeshResultImageView(Context context) {
|
||||
super(context);
|
||||
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a {@link FaceMeshResult} to render.
|
||||
*
|
||||
* @param result a {@link FaceMeshResult} object that contains the solution outputs and the input
|
||||
* {@link Bitmap}.
|
||||
*/
|
||||
public void setFaceMeshResult(FaceMeshResult result) {
|
||||
if (result == null) {
|
||||
return;
|
||||
}
|
||||
Bitmap bmInput = result.inputBitmap();
|
||||
int width = bmInput.getWidth();
|
||||
int height = bmInput.getHeight();
|
||||
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
||||
Canvas canvas = new Canvas(latest);
|
||||
Size imageSize = new Size(width, height);
|
||||
canvas.drawBitmap(bmInput, new Matrix(), null);
|
||||
int numFaces = result.multiFaceLandmarks().size();
|
||||
for (int i = 0; i < numFaces; ++i) {
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_TESSELATION,
|
||||
imageSize,
|
||||
TESSELATION_COLOR,
|
||||
TESSELATION_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_RIGHT_EYE,
|
||||
imageSize,
|
||||
RIGHT_EYE_COLOR,
|
||||
RIGHT_EYE_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
|
||||
imageSize,
|
||||
RIGHT_EYEBROW_COLOR,
|
||||
RIGHT_EYEBROW_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LEFT_EYE,
|
||||
imageSize,
|
||||
LEFT_EYE_COLOR,
|
||||
LEFT_EYE_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
|
||||
imageSize,
|
||||
LEFT_EYEBROW_COLOR,
|
||||
LEFT_EYEBROW_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_FACE_OVAL,
|
||||
imageSize,
|
||||
FACE_OVAL_COLOR,
|
||||
FACE_OVAL_THICKNESS);
|
||||
drawLandmarksOnCanvas(
|
||||
canvas,
|
||||
result.multiFaceLandmarks().get(i).getLandmarkList(),
|
||||
FaceMeshConnections.FACEMESH_LIPS,
|
||||
imageSize,
|
||||
LIPS_COLOR,
|
||||
LIPS_THICKNESS);
|
||||
}
|
||||
}
|
||||
|
||||
/** Updates the image view with the latest facemesh result. */
|
||||
public void update() {
|
||||
postInvalidate();
|
||||
if (latest != null) {
|
||||
setImageBitmap(latest);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Better hand landmark and hand connection drawing.
|
||||
private void drawLandmarksOnCanvas(
|
||||
Canvas canvas,
|
||||
List<NormalizedLandmark> faceLandmarkList,
|
||||
ImmutableSet<FaceMeshConnections.Connection> connections,
|
||||
Size imageSize,
|
||||
int color,
|
||||
int thickness) {
|
||||
// Draw connections.
|
||||
for (FaceMeshConnections.Connection c : connections) {
|
||||
Paint connectionPaint = new Paint();
|
||||
connectionPaint.setColor(color);
|
||||
connectionPaint.setStrokeWidth(thickness);
|
||||
NormalizedLandmark start = faceLandmarkList.get(c.start());
|
||||
NormalizedLandmark end = faceLandmarkList.get(c.end());
|
||||
canvas.drawLine(
|
||||
start.getX() * imageSize.getWidth(),
|
||||
start.getY() * imageSize.getHeight(),
|
||||
end.getX() * imageSize.getWidth(),
|
||||
end.getY() * imageSize.getHeight(),
|
||||
connectionPaint);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,308 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.examples.facemesh;
|
||||
|
||||
import android.content.Intent;
|
||||
import android.graphics.Bitmap;
|
||||
import android.os.Bundle;
|
||||
import android.provider.MediaStore;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import android.util.Log;
|
||||
import android.view.View;
|
||||
import android.widget.Button;
|
||||
import android.widget.FrameLayout;
|
||||
import androidx.activity.result.ActivityResultLauncher;
|
||||
import androidx.activity.result.contract.ActivityResultContracts;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutioncore.CameraInput;
|
||||
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||
import com.google.mediapipe.solutioncore.VideoInput;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMesh;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshOptions;
|
||||
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
|
||||
import java.io.IOException;
|
||||
|
||||
/** Main activity of MediaPipe FaceMesh app. */
|
||||
public class MainActivity extends AppCompatActivity {
|
||||
private static final String TAG = "MainActivity";
|
||||
|
||||
private FaceMesh facemesh;
|
||||
// Run the pipeline and the model inference on GPU or CPU.
|
||||
private static final boolean RUN_ON_GPU = true;
|
||||
|
||||
private enum InputSource {
|
||||
UNKNOWN,
|
||||
IMAGE,
|
||||
VIDEO,
|
||||
CAMERA,
|
||||
}
|
||||
private InputSource inputSource = InputSource.UNKNOWN;
|
||||
// Image demo UI and image loader components.
|
||||
private ActivityResultLauncher<Intent> imageGetter;
|
||||
private FaceMeshResultImageView imageView;
|
||||
// Video demo UI and video loader components.
|
||||
private VideoInput videoInput;
|
||||
private ActivityResultLauncher<Intent> videoGetter;
|
||||
// Live camera demo UI and camera components.
|
||||
private CameraInput cameraInput;
|
||||
private SolutionGlSurfaceView<FaceMeshResult> glSurfaceView;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
setupStaticImageDemoUiComponents();
|
||||
setupVideoDemoUiComponents();
|
||||
setupLiveDemoUiComponents();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
// Restarts the camera and the opengl surface rendering.
|
||||
cameraInput = new CameraInput(this);
|
||||
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||
glSurfaceView.post(this::startCamera);
|
||||
glSurfaceView.setVisibility(View.VISIBLE);
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
videoInput.resume();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
glSurfaceView.setVisibility(View.GONE);
|
||||
cameraInput.close();
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
videoInput.pause();
|
||||
}
|
||||
}
|
||||
|
||||
/** Sets up the UI components for the static image demo. */
|
||||
private void setupStaticImageDemoUiComponents() {
|
||||
// The Intent to access gallery and read images as bitmap.
|
||||
imageGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null) {
|
||||
if (result.getResultCode() == RESULT_OK) {
|
||||
Bitmap bitmap = null;
|
||||
try {
|
||||
bitmap =
|
||||
MediaStore.Images.Media.getBitmap(
|
||||
this.getContentResolver(), resultIntent.getData());
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Bitmap reading error:" + e);
|
||||
}
|
||||
if (bitmap != null) {
|
||||
facemesh.send(bitmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
Button loadImageButton = findViewById(R.id.button_load_picture);
|
||||
loadImageButton.setOnClickListener(
|
||||
v -> {
|
||||
if (inputSource != InputSource.IMAGE) {
|
||||
stopCurrentPipeline();
|
||||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
});
|
||||
imageView = new FaceMeshResultImageView(this);
|
||||
}
|
||||
|
||||
/** The core MediaPipe FaceMesh setup workflow for its static image mode. */
|
||||
private void setupStaticImageModePipeline() {
|
||||
this.inputSource = InputSource.IMAGE;
|
||||
// Initializes a new MediaPipe FaceMesh instance in the static image mode.
|
||||
facemesh =
|
||||
new FaceMesh(
|
||||
this,
|
||||
FaceMeshOptions.builder()
|
||||
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
|
||||
// Connects MediaPipe FaceMesh to the user-defined FaceMeshResultImageView.
|
||||
facemesh.setResultListener(
|
||||
faceMeshResult -> {
|
||||
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true);
|
||||
imageView.setFaceMeshResult(faceMeshResult);
|
||||
runOnUiThread(() -> imageView.update());
|
||||
});
|
||||
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||
|
||||
// Updates the preview layout.
|
||||
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||
frameLayout.removeAllViewsInLayout();
|
||||
imageView.setImageDrawable(null);
|
||||
frameLayout.addView(imageView);
|
||||
imageView.setVisibility(View.VISIBLE);
|
||||
}
|
||||
|
||||
/** Sets up the UI components for the video demo. */
|
||||
private void setupVideoDemoUiComponents() {
|
||||
// The Intent to access gallery and read a video file.
|
||||
videoGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null) {
|
||||
if (result.getResultCode() == RESULT_OK) {
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
videoInput.start(
|
||||
this,
|
||||
resultIntent.getData(),
|
||||
facemesh.getGlContext(),
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
}
|
||||
}
|
||||
});
|
||||
Button loadVideoButton = findViewById(R.id.button_load_video);
|
||||
loadVideoButton.setOnClickListener(
|
||||
v -> {
|
||||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.VIDEO);
|
||||
// Reads video from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
});
|
||||
}
|
||||
|
||||
/** Sets up the UI components for the live demo with camera input. */
|
||||
private void setupLiveDemoUiComponents() {
|
||||
Button startCameraButton = findViewById(R.id.button_start_camera);
|
||||
startCameraButton.setOnClickListener(
|
||||
v -> {
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
return;
|
||||
}
|
||||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.CAMERA);
|
||||
});
|
||||
}
|
||||
|
||||
/** The core MediaPipe FaceMesh setup workflow for its streaming mode. */
|
||||
private void setupStreamingModePipeline(InputSource inputSource) {
|
||||
this.inputSource = inputSource;
|
||||
// Initializes a new MediaPipe FaceMesh instance in the streaming mode.
|
||||
facemesh =
|
||||
new FaceMesh(
|
||||
this,
|
||||
FaceMeshOptions.builder()
|
||||
.setMode(FaceMeshOptions.STREAMING_MODE)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
|
||||
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
|
||||
cameraInput = new CameraInput(this);
|
||||
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
|
||||
videoInput = new VideoInput(this);
|
||||
videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
|
||||
}
|
||||
|
||||
// Initializes a new Gl surface view with a user-defined FaceMeshResultGlRenderer.
|
||||
glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
|
||||
glSurfaceView.setRenderInputImage(true);
|
||||
facemesh.setResultListener(
|
||||
faceMeshResult -> {
|
||||
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ false);
|
||||
glSurfaceView.setRenderData(faceMeshResult);
|
||||
glSurfaceView.requestRender();
|
||||
});
|
||||
|
||||
// The runnable to start camera after the gl surface view is attached.
|
||||
// For video input source, videoInput.start() will be called when the video uri is available.
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
glSurfaceView.post(this::startCamera);
|
||||
}
|
||||
|
||||
// Updates the preview layout.
|
||||
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||
imageView.setVisibility(View.GONE);
|
||||
frameLayout.removeAllViewsInLayout();
|
||||
frameLayout.addView(glSurfaceView);
|
||||
glSurfaceView.setVisibility(View.VISIBLE);
|
||||
frameLayout.requestLayout();
|
||||
}
|
||||
|
||||
private void startCamera() {
|
||||
cameraInput.start(
|
||||
this,
|
||||
facemesh.getGlContext(),
|
||||
CameraInput.CameraFacing.FRONT,
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight());
|
||||
}
|
||||
|
||||
private void stopCurrentPipeline() {
|
||||
if (cameraInput != null) {
|
||||
cameraInput.setNewFrameListener(null);
|
||||
cameraInput.close();
|
||||
}
|
||||
if (videoInput != null) {
|
||||
videoInput.setNewFrameListener(null);
|
||||
videoInput.close();
|
||||
}
|
||||
if (glSurfaceView != null) {
|
||||
glSurfaceView.setVisibility(View.GONE);
|
||||
}
|
||||
if (facemesh != null) {
|
||||
facemesh.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void logNoseLandmark(FaceMeshResult result, boolean showPixelValues) {
|
||||
if (result == null || result.multiFaceLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
NormalizedLandmark noseLandmark = result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
|
||||
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||
if (showPixelValues) {
|
||||
int width = result.inputBitmap().getWidth();
|
||||
int height = result.inputBitmap().getHeight();
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
|
||||
noseLandmark.getX() * width, noseLandmark.getY() * height));
|
||||
} else {
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
noseLandmark.getX(), noseLandmark.getY()));
|
||||
}
|
||||
}
|
||||
}
|
1
mediapipe/examples/android/solutions/facemesh/src/main/res
Symbolic link
1
mediapipe/examples/android/solutions/facemesh/src/main/res
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../res
|
|
@ -19,7 +19,8 @@
|
|||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<activity android:name=".MainActivity">
|
||||
<activity android:name=".MainActivity"
|
||||
android:screenOrientation="portrait">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
|
|
|
@ -31,10 +31,14 @@ android_binary(
|
|||
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
|
||||
"//mediapipe/java/com/google/mediapipe/solutions/hands",
|
||||
"//third_party:androidx_appcompat",
|
||||
"//third_party:androidx_constraint_layout",
|
||||
"//third_party:opencv",
|
||||
"@maven//:androidx_activity_activity",
|
||||
"@maven//:androidx_concurrent_concurrent_futures",
|
||||
"@maven//:androidx_fragment_fragment",
|
||||
"@maven//:com_google_guava_guava",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -46,7 +46,6 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
private int positionHandle;
|
||||
private int transformMatrixHandle;
|
||||
private final float[] transformMatrix = new float[16];
|
||||
private FloatBuffer vertexBuffer;
|
||||
|
||||
private int loadShader(int type, String shaderCode) {
|
||||
int shader = GLES20.glCreateShader(type);
|
||||
|
@ -74,12 +73,15 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
}
|
||||
GLES20.glUseProgram(program);
|
||||
// Sets the transform matrix to align the result rendering with the scaled output texture.
|
||||
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
|
||||
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
|
||||
// top-left corner.
|
||||
Matrix.setIdentityM(transformMatrix, 0);
|
||||
Matrix.scaleM(
|
||||
transformMatrix,
|
||||
0,
|
||||
2 / (boundary.right() - boundary.left()),
|
||||
2 / (boundary.top() - boundary.bottom()),
|
||||
-2 / (boundary.top() - boundary.bottom()),
|
||||
1.0f);
|
||||
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
|
||||
GLES20.glLineWidth(CONNECTION_THICKNESS);
|
||||
|
@ -109,7 +111,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
NormalizedLandmark end = handLandmarkList.get(c.end());
|
||||
vertex[2] = normalizedLandmarkValue(end.getX());
|
||||
vertex[3] = normalizedLandmarkValue(end.getY());
|
||||
vertexBuffer =
|
||||
FloatBuffer vertexBuffer =
|
||||
ByteBuffer.allocateDirect(vertex.length * 4)
|
||||
.order(ByteOrder.nativeOrder())
|
||||
.asFloatBuffer()
|
||||
|
|
|
@ -20,7 +20,7 @@ import android.graphics.Canvas;
|
|||
import android.graphics.Color;
|
||||
import android.graphics.Matrix;
|
||||
import android.graphics.Paint;
|
||||
import android.widget.ImageView;
|
||||
import androidx.appcompat.widget.AppCompatImageView;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutions.hands.Hands;
|
||||
|
@ -28,17 +28,18 @@ import com.google.mediapipe.solutions.hands.HandsResult;
|
|||
import java.util.List;
|
||||
|
||||
/** An ImageView implementation for displaying MediaPipe Hands results. */
|
||||
public class HandsResultImageView extends ImageView {
|
||||
public class HandsResultImageView extends AppCompatImageView {
|
||||
private static final String TAG = "HandsResultImageView";
|
||||
|
||||
private static final int LANDMARK_COLOR = Color.RED;
|
||||
private static final int LANDMARK_RADIUS = 15;
|
||||
private static final int CONNECTION_COLOR = Color.GREEN;
|
||||
private static final int CONNECTION_THICKNESS = 10;
|
||||
private Bitmap latest;
|
||||
|
||||
public HandsResultImageView(Context context) {
|
||||
super(context);
|
||||
setScaleType(ImageView.ScaleType.FIT_CENTER);
|
||||
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -54,8 +55,8 @@ public class HandsResultImageView extends ImageView {
|
|||
Bitmap bmInput = result.inputBitmap();
|
||||
int width = bmInput.getWidth();
|
||||
int height = bmInput.getHeight();
|
||||
Bitmap bmOutput = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
||||
Canvas canvas = new Canvas(bmOutput);
|
||||
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
|
||||
Canvas canvas = new Canvas(latest);
|
||||
|
||||
canvas.drawBitmap(bmInput, new Matrix(), null);
|
||||
int numHands = result.multiHandLandmarks().size();
|
||||
|
@ -63,8 +64,14 @@ public class HandsResultImageView extends ImageView {
|
|||
drawLandmarksOnCanvas(
|
||||
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
/** Updates the image view with the latest hands result. */
|
||||
public void update() {
|
||||
postInvalidate();
|
||||
setImageBitmap(bmOutput);
|
||||
if (latest != null) {
|
||||
setImageBitmap(latest);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Better hand landmark and hand connection drawing.
|
||||
|
|
|
@ -28,6 +28,7 @@ import androidx.activity.result.contract.ActivityResultContracts;
|
|||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutioncore.CameraInput;
|
||||
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||
import com.google.mediapipe.solutioncore.VideoInput;
|
||||
import com.google.mediapipe.solutions.hands.HandLandmark;
|
||||
import com.google.mediapipe.solutions.hands.Hands;
|
||||
import com.google.mediapipe.solutions.hands.HandsOptions;
|
||||
|
@ -39,14 +40,24 @@ public class MainActivity extends AppCompatActivity {
|
|||
private static final String TAG = "MainActivity";
|
||||
|
||||
private Hands hands;
|
||||
private int mode = HandsOptions.STATIC_IMAGE_MODE;
|
||||
// Run the pipeline and the model inference on GPU or CPU.
|
||||
private static final boolean RUN_ON_GPU = true;
|
||||
|
||||
private enum InputSource {
|
||||
UNKNOWN,
|
||||
IMAGE,
|
||||
VIDEO,
|
||||
CAMERA,
|
||||
}
|
||||
private InputSource inputSource = InputSource.UNKNOWN;
|
||||
|
||||
// Image demo UI and image loader components.
|
||||
private Button loadImageButton;
|
||||
private ActivityResultLauncher<Intent> imageGetter;
|
||||
private HandsResultImageView imageView;
|
||||
|
||||
// Video demo UI and video loader components.
|
||||
private VideoInput videoInput;
|
||||
private ActivityResultLauncher<Intent> videoGetter;
|
||||
// Live camera demo UI and camera components.
|
||||
private Button startCameraButton;
|
||||
private CameraInput cameraInput;
|
||||
private SolutionGlSurfaceView<HandsResult> glSurfaceView;
|
||||
|
||||
|
@ -55,26 +66,32 @@ public class MainActivity extends AppCompatActivity {
|
|||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_main);
|
||||
setupStaticImageDemoUiComponents();
|
||||
setupVideoDemoUiComponents();
|
||||
setupLiveDemoUiComponents();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResume() {
|
||||
super.onResume();
|
||||
if (mode == HandsOptions.STREAMING_MODE) {
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
// Restarts the camera and the opengl surface rendering.
|
||||
cameraInput = new CameraInput(this);
|
||||
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||
glSurfaceView.post(this::startCamera);
|
||||
glSurfaceView.setVisibility(View.VISIBLE);
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
videoInput.resume();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPause() {
|
||||
super.onPause();
|
||||
if (mode == HandsOptions.STREAMING_MODE) {
|
||||
stopLiveDemo();
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
glSurfaceView.setVisibility(View.GONE);
|
||||
cameraInput.close();
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
videoInput.pause();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,80 +119,122 @@ public class MainActivity extends AppCompatActivity {
|
|||
}
|
||||
}
|
||||
});
|
||||
loadImageButton = (Button) findViewById(R.id.button_load_picture);
|
||||
Button loadImageButton = findViewById(R.id.button_load_picture);
|
||||
loadImageButton.setOnClickListener(
|
||||
new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View v) {
|
||||
if (mode == HandsOptions.STREAMING_MODE) {
|
||||
stopLiveDemo();
|
||||
}
|
||||
if (hands == null || mode != HandsOptions.STATIC_IMAGE_MODE) {
|
||||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
v -> {
|
||||
if (inputSource != InputSource.IMAGE) {
|
||||
stopCurrentPipeline();
|
||||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
});
|
||||
imageView = new HandsResultImageView(this);
|
||||
}
|
||||
|
||||
/** The core MediaPipe Hands setup workflow for its static image mode. */
|
||||
private void setupStaticImageModePipeline() {
|
||||
this.inputSource = InputSource.IMAGE;
|
||||
// Initializes a new MediaPipe Hands instance in the static image mode.
|
||||
mode = HandsOptions.STATIC_IMAGE_MODE;
|
||||
if (hands != null) {
|
||||
hands.close();
|
||||
}
|
||||
hands = new Hands(this, HandsOptions.builder().setMode(mode).build());
|
||||
hands =
|
||||
new Hands(
|
||||
this,
|
||||
HandsOptions.builder()
|
||||
.setMode(HandsOptions.STATIC_IMAGE_MODE)
|
||||
.setMaxNumHands(1)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
|
||||
// Connects MediaPipe Hands to the user-defined HandsResultImageView.
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
logWristLandmark(handsResult, /*showPixelValues=*/ true);
|
||||
runOnUiThread(() -> imageView.setHandsResult(handsResult));
|
||||
imageView.setHandsResult(handsResult);
|
||||
runOnUiThread(() -> imageView.update());
|
||||
});
|
||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message));
|
||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
||||
// Updates the preview layout.
|
||||
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout);
|
||||
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||
frameLayout.removeAllViewsInLayout();
|
||||
imageView.setImageDrawable(null);
|
||||
frameLayout.addView(imageView);
|
||||
imageView.setVisibility(View.VISIBLE);
|
||||
}
|
||||
|
||||
/** Sets up the UI components for the video demo. */
|
||||
private void setupVideoDemoUiComponents() {
|
||||
// The Intent to access gallery and read a video file.
|
||||
videoGetter =
|
||||
registerForActivityResult(
|
||||
new ActivityResultContracts.StartActivityForResult(),
|
||||
result -> {
|
||||
Intent resultIntent = result.getData();
|
||||
if (resultIntent != null) {
|
||||
if (result.getResultCode() == RESULT_OK) {
|
||||
glSurfaceView.post(
|
||||
() ->
|
||||
videoInput.start(
|
||||
this,
|
||||
resultIntent.getData(),
|
||||
hands.getGlContext(),
|
||||
glSurfaceView.getWidth(),
|
||||
glSurfaceView.getHeight()));
|
||||
}
|
||||
}
|
||||
});
|
||||
Button loadVideoButton = findViewById(R.id.button_load_video);
|
||||
loadVideoButton.setOnClickListener(
|
||||
v -> {
|
||||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.VIDEO);
|
||||
// Reads video from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
});
|
||||
}
|
||||
|
||||
/** Sets up the UI components for the live demo with camera input. */
|
||||
private void setupLiveDemoUiComponents() {
|
||||
startCameraButton = (Button) findViewById(R.id.button_start_camera);
|
||||
Button startCameraButton = findViewById(R.id.button_start_camera);
|
||||
startCameraButton.setOnClickListener(
|
||||
new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View v) {
|
||||
if (hands == null || mode != HandsOptions.STREAMING_MODE) {
|
||||
setupStreamingModePipeline();
|
||||
}
|
||||
v -> {
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
return;
|
||||
}
|
||||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.CAMERA);
|
||||
});
|
||||
}
|
||||
|
||||
/** The core MediaPipe Hands setup workflow for its streaming mode. */
|
||||
private void setupStreamingModePipeline() {
|
||||
private void setupStreamingModePipeline(InputSource inputSource) {
|
||||
this.inputSource = inputSource;
|
||||
// Initializes a new MediaPipe Hands instance in the streaming mode.
|
||||
mode = HandsOptions.STREAMING_MODE;
|
||||
if (hands != null) {
|
||||
hands.close();
|
||||
hands =
|
||||
new Hands(
|
||||
this,
|
||||
HandsOptions.builder()
|
||||
.setMode(HandsOptions.STREAMING_MODE)
|
||||
.setMaxNumHands(1)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
||||
cameraInput = new CameraInput(this);
|
||||
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||
} else if (inputSource == InputSource.VIDEO) {
|
||||
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
|
||||
videoInput = new VideoInput(this);
|
||||
videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||
}
|
||||
hands = new Hands(this, HandsOptions.builder().setMode(mode).build());
|
||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message));
|
||||
|
||||
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
|
||||
cameraInput = new CameraInput(this);
|
||||
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame));
|
||||
|
||||
// Initalizes a new Gl surface view with a user-defined HandsResultGlRenderer.
|
||||
// Initializes a new Gl surface view with a user-defined HandsResultGlRenderer.
|
||||
glSurfaceView =
|
||||
new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion());
|
||||
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
|
||||
|
@ -188,10 +247,13 @@ public class MainActivity extends AppCompatActivity {
|
|||
});
|
||||
|
||||
// The runnable to start camera after the gl surface view is attached.
|
||||
glSurfaceView.post(this::startCamera);
|
||||
// For video input source, videoInput.start() will be called when the video uri is available.
|
||||
if (inputSource == InputSource.CAMERA) {
|
||||
glSurfaceView.post(this::startCamera);
|
||||
}
|
||||
|
||||
// Updates the preview layout.
|
||||
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout);
|
||||
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
|
||||
imageView.setVisibility(View.GONE);
|
||||
frameLayout.removeAllViewsInLayout();
|
||||
frameLayout.addView(glSurfaceView);
|
||||
|
@ -208,34 +270,40 @@ public class MainActivity extends AppCompatActivity {
|
|||
glSurfaceView.getHeight());
|
||||
}
|
||||
|
||||
private void stopLiveDemo() {
|
||||
private void stopCurrentPipeline() {
|
||||
if (cameraInput != null) {
|
||||
cameraInput.stop();
|
||||
cameraInput.setNewFrameListener(null);
|
||||
cameraInput.close();
|
||||
}
|
||||
if (videoInput != null) {
|
||||
videoInput.setNewFrameListener(null);
|
||||
videoInput.close();
|
||||
}
|
||||
if (glSurfaceView != null) {
|
||||
glSurfaceView.setVisibility(View.GONE);
|
||||
}
|
||||
if (hands != null) {
|
||||
hands.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void logWristLandmark(HandsResult result, boolean showPixelValues) {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
|
||||
// For Bitmaps, show the pixel values. For texture inputs, show the normoralized cooridanates.
|
||||
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||
if (showPixelValues) {
|
||||
int width = result.inputBitmap().getWidth();
|
||||
int height = result.inputBitmap().getHeight();
|
||||
Log.i(
|
||||
TAG,
|
||||
"MediaPipe Hand wrist coordinates (pixel values): x= "
|
||||
+ wristLandmark.getX() * width
|
||||
+ " y="
|
||||
+ wristLandmark.getY() * height);
|
||||
String.format(
|
||||
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
|
||||
wristLandmark.getX() * width, wristLandmark.getY() * height));
|
||||
} else {
|
||||
Log.i(
|
||||
TAG,
|
||||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x= "
|
||||
+ wristLandmark.getX()
|
||||
+ " y="
|
||||
+ wristLandmark.getY());
|
||||
String.format(
|
||||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
wristLandmark.getX(), wristLandmark.getY()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,18 +8,23 @@
|
|||
android:id="@+id/buttons"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:gravity="center"
|
||||
style="?android:attr/buttonBarStyle" android:gravity="center"
|
||||
android:orientation="horizontal">
|
||||
<Button
|
||||
android:id="@+id/button_load_picture"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Load Picture" />
|
||||
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||
android:text="@string/load_picture" />
|
||||
<Button
|
||||
android:id="@+id/button_load_video"
|
||||
android:layout_width="wrap_content"
|
||||
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||
android:text="@string/load_video" />
|
||||
<Button
|
||||
android:id="@+id/button_start_camera"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Start Camera" />
|
||||
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
|
||||
android:text="@string/start_camera" />
|
||||
</LinearLayout>
|
||||
<FrameLayout
|
||||
android:id="@+id/preview_display_layout"
|
||||
|
@ -27,9 +32,9 @@
|
|||
android:layout_height="match_parent">
|
||||
<TextView
|
||||
android:id="@+id/no_view"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:gravity="center"
|
||||
android:text="Please press any button above to start" />
|
||||
android:text="@string/instruction" />
|
||||
</FrameLayout>
|
||||
</LinearLayout>
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
<resources>
|
||||
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
|
||||
<string name="load_picture" translatable="false">Load Picture</string>
|
||||
<string name="load_video" translatable="false">Load Video</string>
|
||||
<string name="start_camera" translatable="false">Start Camera</string>
|
||||
<string name="instruction" translatable="false">Please press any button above to start</string>
|
||||
</resources>
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
rootProject.name = "mediapipe-solutions-examples"
|
||||
include ':hands'
|
||||
include ':facemesh'
|
||||
|
|
|
@ -169,6 +169,7 @@ public class MainActivity extends AppCompatActivity {
|
|||
|
||||
public void startCamera() {
|
||||
cameraHelper = new CameraXPreviewHelper();
|
||||
previewFrameTexture = converter.getSurfaceTexture();
|
||||
cameraHelper.setOnCameraStartedListener(
|
||||
surfaceTexture -> {
|
||||
onCameraStarted(surfaceTexture);
|
||||
|
@ -178,7 +179,7 @@ public class MainActivity extends AppCompatActivity {
|
|||
? CameraHelper.CameraFacing.FRONT
|
||||
: CameraHelper.CameraFacing.BACK;
|
||||
cameraHelper.startCamera(
|
||||
this, cameraFacing, /*unusedSurfaceTexture=*/ null, cameraTargetResolution());
|
||||
this, cameraFacing, previewFrameTexture, cameraTargetResolution());
|
||||
}
|
||||
|
||||
protected Size computeViewSize(int width, int height) {
|
||||
|
@ -194,11 +195,8 @@ public class MainActivity extends AppCompatActivity {
|
|||
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
|
||||
boolean isCameraRotated = cameraHelper.isCameraRotated();
|
||||
|
||||
// Connect the converter to the camera-preview frames as its input (via
|
||||
// previewFrameTexture), and configure the output width and height as the computed
|
||||
// display size.
|
||||
converter.setSurfaceTextureAndAttachToGLContext(
|
||||
previewFrameTexture,
|
||||
// Configure the output width and height as the computed display size.
|
||||
converter.setDestinationSize(
|
||||
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
|
||||
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ cc_library(
|
|||
cc_binary(
|
||||
name = "object_detection_tpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/examples/coral:demo_run_graph_main",
|
||||
"//mediapipe/graphs/object_detection:desktop_tflite_calculators",
|
||||
],
|
||||
|
@ -51,6 +52,12 @@ cc_binary(
|
|||
cc_binary(
|
||||
name = "face_detection_tpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_converter_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_inference_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/examples/coral:demo_run_graph_main",
|
||||
"//mediapipe/graphs/face_detection:desktop_live_calculators",
|
||||
],
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#==== ! Prerequisite ! ====
|
||||
# $ sh mediapipe/examples/coral/setup.sh
|
||||
#====
|
||||
|
||||
# for opencv 3.2 default
|
||||
FROM ubuntu:18.04
|
||||
|
||||
MAINTAINER <mediapipe@google.com>
|
||||
|
||||
WORKDIR /mediapipe
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install MediaPipe & Coral deps
|
||||
|
||||
COPY update_sources.sh /
|
||||
RUN /update_sources.sh
|
||||
|
||||
RUN dpkg --add-architecture armhf
|
||||
RUN dpkg --add-architecture arm64
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
crossbuild-essential-arm64 \
|
||||
libusb-1.0-0-dev:arm64 \
|
||||
zlibc:arm64 \
|
||||
pkg-config \
|
||||
zip \
|
||||
unzip \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
python \
|
||||
python-pip \
|
||||
python3-pip \
|
||||
python-numpy \
|
||||
vim-common \
|
||||
ca-certificates \
|
||||
emacs \
|
||||
software-properties-common && \
|
||||
add-apt-repository -y ppa:openjdk-r/ppa && \
|
||||
apt-get update && apt-get install -y openjdk-8-jdk
|
||||
|
||||
RUN pip install --upgrade setuptools
|
||||
RUN pip install future
|
||||
RUN pip3 install six
|
||||
|
||||
COPY . /mediapipe/
|
||||
|
||||
# Install bazel
|
||||
# Please match the current MediaPipe Bazel requirements according to docs.
|
||||
ARG BAZEL_VERSION=3.7.2
|
||||
RUN mkdir /bazel && \
|
||||
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
|
||||
wget --no-check-certificate -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
|
||||
chmod +x /bazel/installer.sh && \
|
||||
/bazel/installer.sh && \
|
||||
rm -f /bazel/installer.sh
|
||||
|
||||
# OpenCV (3.2 default in 18.04)
|
||||
|
||||
RUN apt-get update && apt-get install -y libopencv-dev
|
||||
|
||||
# Opencv libs copied from coral device into opencv32_arm64_libs
|
||||
|
||||
RUN cp opencv32_arm64_libs/* /usr/lib/aarch64-linux-gnu/.
|
||||
|
||||
# Edge tpu header and lib
|
||||
|
||||
RUN git clone https://github.com/google-coral/edgetpu.git /edgetpu
|
||||
RUN cp /edgetpu/libedgetpu/direct/aarch64/libedgetpu.so.1.0 /usr/lib/aarch64-linux-gnu/libedgetpu.so
|
||||
|
||||
# See mediapipe/examples/coral/README.md to finish setup
|
45
mediapipe/examples/coral/Dockerfile.amd64
Normal file
45
mediapipe/examples/coral/Dockerfile.amd64
Normal file
|
@ -0,0 +1,45 @@
|
|||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
FROM debian:buster
|
||||
MAINTAINER <mediapipe@google.com>
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
crossbuild-essential-arm64 \
|
||||
pkg-config \
|
||||
zip \
|
||||
unzip \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
tree \
|
||||
vim \
|
||||
sudo \
|
||||
python3-all \
|
||||
python3-pip \
|
||||
python3-numpy \
|
||||
ca-certificates \
|
||||
software-properties-common \
|
||||
libusb-1.0-0-dev \
|
||||
libopencv-core-dev \
|
||||
libopencv-imgproc-dev \
|
||||
libopencv-video-dev \
|
||||
libopencv-highgui-dev \
|
||||
libopencv-videoio-dev \
|
||||
libopencv-contrib-dev
|
||||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||
RUN wget -O /usr/bin/bazel \
|
||||
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||
chmod +x /usr/bin/bazel
|
||||
ENV BAZEL_CPU=k8
|
47
mediapipe/examples/coral/Dockerfile.arm64
Normal file
47
mediapipe/examples/coral/Dockerfile.arm64
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
FROM debian:buster
|
||||
MAINTAINER <mediapipe@google.com>
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
crossbuild-essential-arm64 \
|
||||
pkg-config \
|
||||
zip \
|
||||
unzip \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
tree \
|
||||
vim \
|
||||
sudo \
|
||||
python3-all \
|
||||
python3-pip \
|
||||
python3-numpy \
|
||||
ca-certificates \
|
||||
software-properties-common
|
||||
RUN dpkg --add-architecture arm64
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libusb-1.0-0-dev:arm64 \
|
||||
libopencv-core-dev:arm64 \
|
||||
libopencv-imgproc-dev:arm64 \
|
||||
libopencv-video-dev:arm64 \
|
||||
libopencv-highgui-dev:arm64 \
|
||||
libopencv-videoio-dev:arm64 \
|
||||
libopencv-contrib-dev:arm64
|
||||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||
RUN wget -O /usr/bin/bazel \
|
||||
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||
chmod +x /usr/bin/bazel
|
||||
ENV BAZEL_CPU=aarch64
|
47
mediapipe/examples/coral/Dockerfile.armhf
Normal file
47
mediapipe/examples/coral/Dockerfile.armhf
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright 2021 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
FROM debian:buster
|
||||
MAINTAINER <mediapipe@google.com>
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
crossbuild-essential-armhf \
|
||||
pkg-config \
|
||||
zip \
|
||||
unzip \
|
||||
curl \
|
||||
wget \
|
||||
git \
|
||||
tree \
|
||||
vim \
|
||||
sudo \
|
||||
python3-all \
|
||||
python3-pip \
|
||||
python3-numpy \
|
||||
ca-certificates \
|
||||
software-properties-common
|
||||
RUN dpkg --add-architecture armhf
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
libusb-1.0-0-dev:armhf \
|
||||
libopencv-core-dev:armhf \
|
||||
libopencv-imgproc-dev:armhf \
|
||||
libopencv-video-dev:armhf \
|
||||
libopencv-highgui-dev:armhf \
|
||||
libopencv-videoio-dev:armhf \
|
||||
libopencv-contrib-dev:armhf
|
||||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
|
||||
RUN wget -O /usr/bin/bazel \
|
||||
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
|
||||
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
|
||||
chmod +x /usr/bin/bazel
|
||||
ENV BAZEL_CPU=armv7a
|
55
mediapipe/examples/coral/Makefile
Normal file
55
mediapipe/examples/coral/Makefile
Normal file
|
@ -0,0 +1,55 @@
|
|||
SHELL := /bin/bash
|
||||
|
||||
MAKEFILE_DIR := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
|
||||
MEDIAPIPE_DIR := $(MAKEFILE_DIR)/../../..
|
||||
|
||||
BAZEL_COMPILATION_MODE ?= opt
|
||||
BAZEL_TARGET ?= mediapipe/examples/coral:face_detection_tpu
|
||||
BAZEL_CPU ?= k8
|
||||
|
||||
OUT_DIR := $(MEDIAPIPE_DIR)/out/$(BAZEL_CPU)
|
||||
|
||||
PLATFORM ?= amd64
|
||||
DOCKER_FILE ?= $(MAKEFILE_DIR)/Dockerfile.$(PLATFORM)
|
||||
DOCKER_COMMAND ?=
|
||||
|
||||
bazel_output = $(MEDIAPIPE_DIR)/bazel-bin/$(subst :,/,$(1))
|
||||
|
||||
define run_command
|
||||
chmod a+w /; \
|
||||
groupadd --gid $(shell id -g) $(shell id -g -n); \
|
||||
useradd -m -e '' -s /bin/bash --gid $(shell id -g) --uid $(shell id -u) $(shell id -u -n); \
|
||||
echo '$(shell id -u -n) ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers; \
|
||||
su $(shell id -u -n) $(if $(1),-c '$(1)',)
|
||||
endef
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
@echo "make help - Print help"
|
||||
@echo "make docker - Run Docker environment"
|
||||
@echo "make build - Run Bazel build, use BAZEL_TARGET to choose which target to build"
|
||||
|
||||
ifeq (,$(wildcard /.dockerenv))
|
||||
.PHONY: docker
|
||||
docker:
|
||||
docker run --rm -i --tty \
|
||||
-v $(MEDIAPIPE_DIR):/mediapipe \
|
||||
--workdir /mediapipe/ \
|
||||
$(shell docker build -q - < $(DOCKER_FILE)) \
|
||||
/bin/bash -c "$(call run_command,$(DOCKER_COMMAND))"
|
||||
endif
|
||||
|
||||
.PHONY: build
|
||||
build:
|
||||
(cd $(MEDIAPIPE_DIR) && \
|
||||
bazel build \
|
||||
--crosstool_top=@crosstool//:toolchains \
|
||||
--compiler=gcc \
|
||||
--cpu=${BAZEL_CPU} \
|
||||
--compilation_mode=${BAZEL_COMPILATION_MODE} \
|
||||
--define darwinn_portable=1 \
|
||||
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
--define MEDIAPIPE_EDGE_TPU=all \
|
||||
$(BAZEL_TARGET) && \
|
||||
mkdir -p $(OUT_DIR) && \
|
||||
cp -f $(call bazel_output,$(BAZEL_TARGET)) $(OUT_DIR))
|
|
@ -1,156 +1,173 @@
|
|||
# Coral Dev Board Setup (experimental)
|
||||
# Coral Support
|
||||
|
||||
**Disclaimer**: Running MediaPipe on Coral is experimental, and this process may
|
||||
not be exact and is subject to change. These instructions have only been tested
|
||||
on the [Coral Dev Board](https://coral.ai/products/dev-board/)
|
||||
running [Mendel Enterprise Day 13](https://coral.ai/software/) OS and
|
||||
using [Diploria2](https://github.com/google-coral/edgetpu/tree/diploria2)
|
||||
edgetpu libs, and may vary for different devices and workstations.
|
||||
## Bazel Setup
|
||||
|
||||
This file describes how to prepare a Coral Dev Board and setup a Linux
|
||||
Docker container for building MediaPipe applications that run on Edge TPU.
|
||||
You can compile MediaPipe with enabled Edge TPU support to run
|
||||
[Coral models](http://coral.ai/models). Just add
|
||||
`--define MEDIAPIPE_EDGE_TPU=<type>` to the `bazel` command:
|
||||
|
||||
## Before creating the Docker
|
||||
* `--define MEDIAPIPE_EDGE_TPU=usb` for Coral USB devices on Linux and macOS
|
||||
* `--define MEDIAPIPE_EDGE_TPU=pci` for Coral PCIe devices on Linux
|
||||
* `--define MEDIAPIPE_EDGE_TPU=all` for both Coral USB and PCIe devices on Linux
|
||||
|
||||
* (on host machine) run _setup.sh_ from MediaPipe root directory
|
||||
You have to install `libusb` library in order to compile with USB support:
|
||||
|
||||
sh mediapipe/examples/coral/setup.sh
|
||||
* `libusb-1.0-0-dev` on Linux
|
||||
* `libusb` on macOS via MacPorts or Homebrew
|
||||
|
||||
* Setup the coral device via [here](https://coral.withgoogle.com/docs/dev-board/get-started/), and ensure the _mdt_ command works
|
||||
Command to compile face detection Coral example:
|
||||
|
||||
Note: alias mdt="python3 -m mdt.main" may be needed on some systems
|
||||
```bash
|
||||
bazel build \
|
||||
--compilation_mode=opt \
|
||||
--define darwinn_portable=1 \
|
||||
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||
--linkopt=-l:libusb-1.0.so \
|
||||
mediapipe/examples/coral:face_detection_tpu build
|
||||
```
|
||||
|
||||
* (on coral device) prepare MediaPipe
|
||||
## Cross-compilation
|
||||
|
||||
cd ~
|
||||
sudo apt-get update && sudo apt-get install -y git
|
||||
git clone https://github.com/google/mediapipe.git
|
||||
mkdir mediapipe/bazel-bin
|
||||
Sometimes you need to cross-compile MediaPipe source code, e.g. get `ARM32`
|
||||
or `ARM64` binaries on `x86` system. Install cross-compilation toolchain on
|
||||
your system or use our preconfigured Docker environment for that:
|
||||
|
||||
* (on coral device) install opencv 3.2
|
||||
```bash
|
||||
# For ARM32 (e.g. Raspberry Pi)
|
||||
make -C mediapipe/examples/coral PLATFORM=armhf docker
|
||||
|
||||
sudo apt-get update && sudo apt-get install -y libopencv-dev
|
||||
# For ARM64 (e.g. Coral Dev Board)
|
||||
make -C mediapipe/examples/coral PLATFORM=arm64 docker
|
||||
```
|
||||
|
||||
* (on coral device) find all opencv libs
|
||||
After running this command you'll get a shell to the Docker environment which
|
||||
has everything ready to start compilation:
|
||||
|
||||
find /usr/lib/aarch64-linux-gnu/ -name 'libopencv*so'
|
||||
```bash
|
||||
# For ARM32 (e.g. Raspberry Pi)
|
||||
bazel build \
|
||||
--crosstool_top=@crosstool//:toolchains \
|
||||
--compiler=gcc \
|
||||
--cpu=armv7a \
|
||||
--define darwinn_portable=1 \
|
||||
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||
--linkopt=-l:libusb-1.0.so \
|
||||
mediapipe/examples/coral:face_detection_tpu build
|
||||
|
||||
* (on host machine) copy core opencv libs from coral device to a local folder inside MediaPipe checkout:
|
||||
# For ARM64 (e.g. Coral Dev Board)
|
||||
bazel build \
|
||||
--crosstool_top=@crosstool//:toolchains \
|
||||
--compiler=gcc \
|
||||
--cpu=aarch64 \
|
||||
--define darwinn_portable=1 \
|
||||
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||
--linkopt=-l:libusb-1.0.so \
|
||||
mediapipe/examples/coral:face_detection_tpu build
|
||||
```
|
||||
|
||||
# in root level mediapipe folder #
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_core.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_calib3d.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_features2d.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_highgui.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgcodecs.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgproc.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_video.so opencv32_arm64_libs
|
||||
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_videoio.so opencv32_arm64_libs
|
||||
Our Docker environment defines `${BAZEL_CPU}` value, so you can use it directly:
|
||||
|
||||
* (on host machine) Create and start the docker environment
|
||||
```bash
|
||||
bazel build \
|
||||
--crosstool_top=@crosstool//:toolchains \
|
||||
--compiler=gcc \
|
||||
--cpu=${BAZEL_CPU} \
|
||||
--define darwinn_portable=1 \
|
||||
--define MEDIAPIPE_DISABLE_GPU=1 \
|
||||
--define MEDIAPIPE_EDGE_TPU=usb \
|
||||
--linkopt=-l:libusb-1.0.so \
|
||||
mediapipe/examples/coral:face_detection_tpu build
|
||||
```
|
||||
|
||||
# from mediapipe root level directory #
|
||||
docker build -t coral .
|
||||
docker run -it --name coral coral:latest
|
||||
The command above is already defined in our `Makefile`, so you can simply run:
|
||||
|
||||
## Inside the Docker environment
|
||||
```bash
|
||||
make -C mediapipe/examples/coral \
|
||||
BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu \
|
||||
build
|
||||
```
|
||||
|
||||
* Update library paths in /mediapipe/third_party/opencv_linux.BUILD
|
||||
The output binary will be automatically copied to `out/<platform>` directory.
|
||||
|
||||
(replace 'x86_64-linux-gnu' with 'aarch64-linux-gnu')
|
||||
You can also run compilation inside Docker environment as a single
|
||||
command:
|
||||
|
||||
"lib/aarch64-linux-gnu/libopencv_core.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_calib3d.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_features2d.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_highgui.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_imgcodecs.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_imgproc.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_video.so",
|
||||
"lib/aarch64-linux-gnu/libopencv_videoio.so",
|
||||
```bash
|
||||
make -C mediapipe/examples/coral \
|
||||
PLATFORM=armhf \
|
||||
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
|
||||
docker
|
||||
```
|
||||
|
||||
* Attempt to build hello world (to download external deps)
|
||||
and get the output binary from `out/<platform>` directory. Any Mediapipe target
|
||||
can be cross-compiled this way, e.g. try
|
||||
`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`.
|
||||
|
||||
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world
|
||||
To summarize everything:
|
||||
|
||||
* Edit /edgetpu/libedgetpu/BUILD
|
||||
| Arch | PLATFORM | Output | Board |
|
||||
| ----- | -------------- | ----------- | -------------------------------------------------------- |
|
||||
| ARM32 | PLATFORM=armhf | out/armv7a | [Raspberry Pi](https://www.raspberrypi.org/products/) |
|
||||
| ARM64 | PLATFORM=arm64 | out/aarch64 | [Coral Dev Board](https://coral.ai/products/dev-board/) |
|
||||
|
||||
to add this build target
|
||||
## Coral Examples
|
||||
|
||||
cc_library(
|
||||
name = "lib",
|
||||
srcs = [
|
||||
"libedgetpu.so",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
There are two Coral examples in `mediapipe/examples/coral` directory. Compile
|
||||
them for your platform:
|
||||
|
||||
* Edit /edgetpu/WORKSPACE
|
||||
```bash
|
||||
# Face detection
|
||||
make -C mediapipe/examples/coral \
|
||||
PLATFORM=armhf \
|
||||
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
|
||||
docker
|
||||
|
||||
update /mediapipe/WORKSPACE TENSORFLOW_* variables to match what /edgetpu/WORKSPACE has:
|
||||
# Object detection
|
||||
make -C mediapipe/examples/coral \
|
||||
PLATFORM=armhf \
|
||||
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:object_detection_tpu build" \
|
||||
docker
|
||||
```
|
||||
|
||||
grep TENSORFLOW_ /mediapipe/WORKSPACE
|
||||
grep TENSORFLOW_ /edgetpu/WORKSPACE
|
||||
Copy output binaries along with corresponding auxiliary files to your target
|
||||
system. You can copy the whole `mediapipe` folder for simplicity:
|
||||
|
||||
# Make sure the /mediapipe/WORKSPACE _TENSORFLOW_GIT_COMMIT and _TENSORFLOW_SHA256
|
||||
# match the /edgetpu/WORKSPACE TENSORFLOW_COMMIT and TENSORFLOW_SHA256 respectively.
|
||||
```bash
|
||||
scp -r mediapipe <user>@<host>:.
|
||||
```
|
||||
|
||||
# If they do not match, modify /mediapipe/WORKSPACE to match what /edgetpu/WORKSPACE has.
|
||||
# Also comment out the MediaPipe org_tensorflow patch section.
|
||||
OpenCV runtime libraries need to be installed on your target system:
|
||||
|
||||
* Edit /mediapipe/mediapipe/calculators/tflite/BUILD to change rules for *tflite_inference_calculator.cc*
|
||||
```bash
|
||||
sudo apt-get install -y \
|
||||
libopencv-core-dev \
|
||||
libopencv-highgui-dev \
|
||||
libopencv-calib3d-dev \
|
||||
libopencv-features2d-dev \
|
||||
libopencv-imgproc-dev \
|
||||
libopencv-video-dev
|
||||
```
|
||||
|
||||
sed -i 's/\":tflite_inference_calculator_cc_proto\",/\":tflite_inference_calculator_cc_proto\",\n\t\"@edgetpu\/\/:header\",\n\t\"@libedgetpu\/\/:lib\",/g' /mediapipe/mediapipe/calculators/tflite/BUILD
|
||||
If you are going to connect Coral USB accelerator to your target system then
|
||||
you'll also need `libusb` library:
|
||||
|
||||
The above command should add
|
||||
```shell
|
||||
sudo apt-get install -y \
|
||||
libusb-1.0-0
|
||||
```
|
||||
|
||||
"@edgetpu//:header",
|
||||
"@libedgetpu//:lib",
|
||||
Connect USB camera and Coral device to your target system and run the copied
|
||||
binaries:
|
||||
|
||||
to the _deps_ of tflite_inference_calculator.cc
|
||||
|
||||
Now also remove XNNPACK deps:
|
||||
|
||||
sed -i 's/\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/#\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/g' /mediapipe/mediapipe/calculators/tflite/BUILD
|
||||
|
||||
#### Now try cross-compiling for device
|
||||
|
||||
* Object detection demo
|
||||
|
||||
![Object Detection running on Coral](./images/object_detection_demo_coral.jpg)
|
||||
|
||||
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:object_detection_tpu
|
||||
|
||||
Copy object_detection_tpu binary to the MediaPipe checkout on the coral device
|
||||
|
||||
# outside docker env, open new terminal on host machine #
|
||||
docker ps
|
||||
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/object_detection_tpu /tmp/.
|
||||
mdt push /tmp/object_detection_tpu /home/mendel/mediapipe/bazel-bin/.
|
||||
|
||||
* Face detection demo
|
||||
|
||||
![Face Detection running on Coral](./images/face_detection_demo_coral.gif)
|
||||
|
||||
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:face_detection_tpu
|
||||
|
||||
Copy face_detection_tpu binary to the MediaPipe checkout on the coral device
|
||||
|
||||
# outside docker env, open new terminal on host machine #
|
||||
docker ps
|
||||
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/face_detection_tpu /tmp/.
|
||||
mdt push /tmp/face_detection_tpu /home/mendel/mediapipe/bazel-bin/.
|
||||
|
||||
## On the coral device (with display)
|
||||
|
||||
# Object detection
|
||||
cd ~/mediapipe
|
||||
chmod +x bazel-bin/object_detection_tpu
|
||||
export GLOG_logtostderr=1
|
||||
bazel-bin/object_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
|
||||
|
||||
# Face detection
|
||||
cd ~/mediapipe
|
||||
chmod +x bazel-bin/face_detection_tpu
|
||||
export GLOG_logtostderr=1
|
||||
bazel-bin/face_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
|
||||
```bash
|
||||
# Face Detection
|
||||
GLOG_logtostderr=1 ./face_detection_tpu --calculator_graph_config_file \
|
||||
mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
|
||||
|
||||
# Object Detection
|
||||
GLOG_logtostderr=1 ./object_detection_tpu --calculator_graph_config_file \
|
||||
mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
|
||||
```
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
|
||||
### Coral additions to MediaPipe WORKSPACE ###
|
||||
|
||||
#COMMIT=$(git ls-remote https://github.com/google-coral/crosstool master | awk '{print $1}')
|
||||
#SHA256=$(curl -L "https://github.com/google-coral/crosstool/archive/${COMMIT}.tar.gz" | sha256sum | awk '{print $1}')
|
||||
# Oct 2019
|
||||
#COMMIT=9e00d5be43bf001f883b5700f5d04882fea00229
|
||||
#SHA256=cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb
|
||||
http_archive(
|
||||
name = "coral_crosstool",
|
||||
sha256 = "cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb",
|
||||
strip_prefix = "crosstool-9e00d5be43bf001f883b5700f5d04882fea00229",
|
||||
urls = [
|
||||
"https://github.com/google-coral/crosstool/archive/9e00d5be43bf001f883b5700f5d04882fea00229.tar.gz",
|
||||
],
|
||||
)
|
||||
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
|
||||
cc_crosstool(name = "crosstool")
|
||||
|
||||
# EdgeTPU
|
||||
new_local_repository(
|
||||
name = "edgetpu",
|
||||
path = "/edgetpu/libedgetpu",
|
||||
build_file = "/edgetpu/libedgetpu/BUILD"
|
||||
)
|
||||
new_local_repository(
|
||||
name = "libedgetpu",
|
||||
path = "/usr/lib/aarch64-linux-gnu",
|
||||
build_file = "/edgetpu/libedgetpu/BUILD"
|
||||
)
|
|
@ -74,43 +74,12 @@ node {
|
|||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 6
|
||||
min_scale: 0.2
|
||||
max_scale: 0.95
|
||||
input_size_height: 300
|
||||
input_size_width: 300
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 64
|
||||
strides: 128
|
||||
strides: 256
|
||||
strides: 512
|
||||
aspect_ratios: 1.0
|
||||
aspect_ratios: 2.0
|
||||
aspect_ratios: 0.5
|
||||
aspect_ratios: 3.0
|
||||
aspect_ratios: 0.3333
|
||||
reduce_boxes_in_lowest_layer: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
options: {
|
||||
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
set -v
|
||||
|
||||
echo 'Please run this from root level mediapipe directory! \n Ex:'
|
||||
echo ' sh mediapipe/examples/coral/setup.sh '
|
||||
|
||||
sleep 3
|
||||
|
||||
mkdir -p opencv32_arm64_libs
|
||||
|
||||
# prepare docker aux script
|
||||
cp mediapipe/examples/coral/update_sources.sh update_sources.sh
|
||||
chmod +x update_sources.sh
|
||||
|
||||
# backup non-coral Dockerfile
|
||||
mv Dockerfile Dockerfile.orig
|
||||
cp mediapipe/examples/coral/Dockerfile Dockerfile
|
||||
|
||||
# backup non-coral workspace
|
||||
cp WORKSPACE WORKSPACE.orig
|
||||
|
||||
# create temps
|
||||
cp WORKSPACE WORKSPACE.1
|
||||
cp mediapipe/examples/coral/WORKSPACE.coral WORKSPACE.2
|
||||
|
||||
# merge (shell decides concat order, unless numbered appropriately)
|
||||
cat WORKSPACE.1 WORKSPACE.2 > WORKSPACE
|
||||
|
||||
# cleanup
|
||||
rm WORKSPACE.1 WORKSPACE.2
|
||||
|
||||
echo 'done'
|
|
@ -1,11 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# To run in the Coral Docker environment.
|
||||
|
||||
. /etc/os-release
|
||||
|
||||
sed -i "s/deb\ /deb \[arch=amd64\]\ /g" /etc/apt/sources.list
|
||||
|
||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME} main universe" >> /etc/apt/sources.list
|
||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-updates main universe" >> /etc/apt/sources.list
|
||||
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-security main universe" >> /etc/apt/sources.list
|
|
@ -17,8 +17,10 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library"
|
|||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = [
|
||||
"//buzz/diffractor/mediapipe:__subpackages__",
|
||||
"//mediapipe/examples:__subpackages__",
|
||||
"//mediapipe/viz:__subpackages__",
|
||||
"//mediapipe/web/solutions:__subpackages__",
|
||||
])
|
||||
|
||||
cc_library(
|
||||
|
|
|
@ -43,6 +43,9 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kDetectedBordersTag[] = "DETECTED_BORDERS";
|
||||
constexpr char kVideoTag[] = "VIDEO";
|
||||
|
||||
const char kConfig[] = R"(
|
||||
calculator: "BorderDetectionCalculator"
|
||||
input_stream: "VIDEO:camera_frames"
|
||||
|
@ -81,14 +84,14 @@ TEST(BorderDetectionCalculatorTest, NoBorderTest) {
|
|||
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
|
||||
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
|
||||
input_mat.setTo(cv::Scalar(0, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(0, static_features.border().size());
|
||||
|
@ -115,14 +118,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderTest) {
|
|||
cv::Mat sub_image =
|
||||
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(1, static_features.border().size());
|
||||
|
@ -155,14 +158,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderPadTest) {
|
|||
cv::Mat sub_image =
|
||||
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
|
||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(1, static_features.border().size());
|
||||
|
@ -197,14 +200,14 @@ TEST(BorderDetectionCalculatorTest, BottomBorderTest) {
|
|||
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
||||
kTestFrameWidth, kBottomBorderHeight));
|
||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(1, static_features.border().size());
|
||||
|
@ -238,14 +241,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTest) {
|
|||
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
|
||||
kTestFrameWidth, kBottomBorderHeight));
|
||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(2, static_features.border().size());
|
||||
|
@ -291,14 +294,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTestAspect2) {
|
|||
input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight,
|
||||
kTestFrameWidthTall, kBottomBorderHeight));
|
||||
bottom_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(2, static_features.border().size());
|
||||
|
@ -352,14 +355,14 @@ TEST(BorderDetectionCalculatorTest, DominantColor) {
|
|||
input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2));
|
||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("DETECTED_BORDERS").packets;
|
||||
runner->Outputs().Tag(kDetectedBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
ASSERT_EQ(0, static_features.border().size());
|
||||
|
@ -383,7 +386,7 @@ void BM_Large(benchmark::State& state) {
|
|||
cv::Mat sub_image =
|
||||
input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight));
|
||||
sub_image.setTo(cv::Scalar(255, 0, 0));
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
|
||||
// Run the calculator.
|
||||
|
|
|
@ -31,7 +31,11 @@ constexpr char kVideoSize[] = "VIDEO_SIZE";
|
|||
constexpr char kSalientRegions[] = "SALIENT_REGIONS";
|
||||
constexpr char kDetections[] = "DETECTIONS";
|
||||
constexpr char kDetectedBorders[] = "BORDERS";
|
||||
// Crop location as abs rect discretized.
|
||||
constexpr char kCropRect[] = "CROP_RECT";
|
||||
// Crop location as normalized rect.
|
||||
constexpr char kNormalizedCropRect[] = "NORMALIZED_CROP_RECT";
|
||||
// Crop location without position smoothing.
|
||||
constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
|
||||
// Can be used to control whether an animated zoom should actually performed
|
||||
// (configured through option us_to_first_rect). If provided, a non-zero integer
|
||||
|
@ -51,6 +55,8 @@ constexpr float kFieldOfView = 60;
|
|||
// Used to save state on Close and load state on Open in a new graph.
|
||||
// Can be used to preserve state between graphs.
|
||||
constexpr char kStateCache[] = "STATE_CACHE";
|
||||
// Tolerance for zooming out recentering.
|
||||
constexpr float kPixelTolerance = 3;
|
||||
|
||||
namespace mediapipe {
|
||||
namespace autoflip {
|
||||
|
@ -166,6 +172,9 @@ absl::Status ContentZoomingCalculator::GetContract(
|
|||
if (cc->Outputs().HasTag(kCropRect)) {
|
||||
cc->Outputs().Tag(kCropRect).Set<mediapipe::Rect>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
cc->Outputs().Tag(kNormalizedCropRect).Set<mediapipe::NormalizedRect>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||
cc->Outputs().Tag(kFirstCropRect).Set<mediapipe::NormalizedRect>();
|
||||
}
|
||||
|
@ -553,6 +562,16 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
|
||||
default_rect->set_x_center(0.5);
|
||||
default_rect->set_y_center(0.5);
|
||||
default_rect->set_width(1.0);
|
||||
default_rect->set_height(1.0);
|
||||
cc->Outputs()
|
||||
.Tag(kNormalizedCropRect)
|
||||
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
// Also provide a first crop rect: in this case a zero-sized one.
|
||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||
cc->Outputs()
|
||||
|
@ -634,9 +653,9 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
// Compute smoothed zoom camera path.
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
|
||||
height, cc->InputTimestamp().Microseconds()));
|
||||
int path_height;
|
||||
float path_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
|
||||
int path_width = path_height * target_aspect_;
|
||||
float path_width = path_height * target_aspect_;
|
||||
|
||||
// Update pixel-per-degree value for pan/tilt.
|
||||
int target_height;
|
||||
|
@ -652,11 +671,48 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
offset_x, cc->InputTimestamp().Microseconds()));
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->AddObservation(
|
||||
offset_y, cc->InputTimestamp().Microseconds()));
|
||||
int path_offset_x;
|
||||
float path_offset_x;
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->GetState(&path_offset_x));
|
||||
int path_offset_y;
|
||||
float path_offset_y;
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
|
||||
|
||||
float delta_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
|
||||
int delta_width = delta_height * target_aspect_;
|
||||
|
||||
// Smooth centering when zooming out.
|
||||
float remaining_width = target_width - path_width;
|
||||
int width_space = frame_width_ - target_width;
|
||||
if (abs(path_offset_x - frame_width_ / 2) >
|
||||
width_space / 2 + kPixelTolerance &&
|
||||
remaining_width > kPixelTolerance) {
|
||||
float required_width =
|
||||
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
|
||||
if (path_offset_x < frame_width_ / 2) {
|
||||
path_offset_x += delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
} else {
|
||||
path_offset_x -= delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
}
|
||||
}
|
||||
|
||||
float remaining_height = target_height - path_height;
|
||||
int height_space = frame_height_ - target_height;
|
||||
if (abs(path_offset_y - frame_height_ / 2) >
|
||||
height_space / 2 + kPixelTolerance &&
|
||||
remaining_height > kPixelTolerance) {
|
||||
float required_height =
|
||||
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
|
||||
if (path_offset_y < frame_height_ / 2) {
|
||||
path_offset_y += delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
} else {
|
||||
path_offset_y -= delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent box from extending beyond the image after camera smoothing.
|
||||
if (path_offset_y - ceil(path_height / 2.0) < 0) {
|
||||
path_offset_y = ceil(path_height / 2.0);
|
||||
|
@ -705,7 +761,7 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
is_animating = IsAnimatingToFirstRect(cc->InputTimestamp());
|
||||
}
|
||||
|
||||
// Transmit downstream to glcroppingcalculator.
|
||||
// Transmit downstream to glcroppingcalculator in discrete int values.
|
||||
if (cc->Outputs().HasTag(kCropRect)) {
|
||||
std::unique_ptr<mediapipe::Rect> gpu_rect;
|
||||
if (is_animating) {
|
||||
|
@ -716,13 +772,36 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
} else {
|
||||
gpu_rect = absl::make_unique<mediapipe::Rect>();
|
||||
gpu_rect->set_x_center(path_offset_x);
|
||||
gpu_rect->set_width(path_height * target_aspect_);
|
||||
gpu_rect->set_width(path_width);
|
||||
gpu_rect->set_y_center(path_offset_y);
|
||||
gpu_rect->set_height(path_height);
|
||||
}
|
||||
cc->Outputs().Tag(kCropRect).Add(gpu_rect.release(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
|
||||
absl::make_unique<mediapipe::NormalizedRect>();
|
||||
float float_frame_width = static_cast<float>(frame_width_);
|
||||
float float_frame_height = static_cast<float>(frame_height_);
|
||||
if (is_animating) {
|
||||
auto rect =
|
||||
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
|
||||
MP_RETURN_IF_ERROR(rect.status());
|
||||
gpu_rect->set_x_center(rect->x_center() / float_frame_width);
|
||||
gpu_rect->set_width(rect->width() / float_frame_width);
|
||||
gpu_rect->set_y_center(rect->y_center() / float_frame_height);
|
||||
gpu_rect->set_height(rect->height() / float_frame_height);
|
||||
} else {
|
||||
gpu_rect->set_x_center(path_offset_x / float_frame_width);
|
||||
gpu_rect->set_width(path_width / float_frame_width);
|
||||
gpu_rect->set_y_center(path_offset_y / float_frame_height);
|
||||
gpu_rect->set_height(path_height / float_frame_height);
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kNormalizedCropRect)
|
||||
.Add(gpu_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||
cc->Outputs()
|
||||
|
|
|
@ -38,6 +38,17 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kFirstCropRectTag[] = "FIRST_CROP_RECT";
|
||||
constexpr char kStateCacheTag[] = "STATE_CACHE";
|
||||
constexpr char kCropRectTag[] = "CROP_RECT";
|
||||
constexpr char kBordersTag[] = "BORDERS";
|
||||
constexpr char kSalientRegionsTag[] = "SALIENT_REGIONS";
|
||||
constexpr char kVideoTag[] = "VIDEO";
|
||||
constexpr char kMaxZoomFactorPctTag[] = "MAX_ZOOM_FACTOR_PCT";
|
||||
constexpr char kAnimateZoomTag[] = "ANIMATE_ZOOM";
|
||||
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
|
||||
const char kConfigA[] = R"(
|
||||
calculator: "ContentZoomingCalculator"
|
||||
input_stream: "VIDEO:camera_frames"
|
||||
|
@ -48,12 +59,15 @@ const char kConfigA[] = R"(
|
|||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -73,12 +87,15 @@ const char kConfigB[] = R"(
|
|||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -94,12 +111,15 @@ const char kConfigC[] = R"(
|
|||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -111,17 +131,21 @@ const char kConfigD[] = R"(
|
|||
input_stream: "DETECTIONS:detections"
|
||||
output_stream: "CROP_RECT:rect"
|
||||
output_stream: "FIRST_CROP_RECT:first_rect"
|
||||
output_stream: "NORMALIZED_CROP_RECT:float_rect"
|
||||
options: {
|
||||
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
|
||||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -139,12 +163,15 @@ const char kConfigE[] = R"(
|
|||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -162,12 +189,15 @@ const char kConfigF[] = R"(
|
|||
max_zoom_value_deg: 0
|
||||
kinematic_options_zoom {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_tilt {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
kinematic_options_pan {
|
||||
min_motion_to_reframe: 1.2
|
||||
max_velocity: 18
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -220,17 +250,17 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
|||
detections->push_back(detection);
|
||||
}
|
||||
runner->MutableInputs()
|
||||
->Tag("DETECTIONS")
|
||||
->Tag(kDetectionsTag)
|
||||
.packets.push_back(Adopt(detections.release()).At(Timestamp(time)));
|
||||
|
||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(width, height);
|
||||
runner->MutableInputs()
|
||||
->Tag("VIDEO_SIZE")
|
||||
->Tag(kVideoSizeTag)
|
||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(time)));
|
||||
|
||||
if (flags.animated_zoom.has_value()) {
|
||||
runner->MutableInputs()
|
||||
->Tag("ANIMATE_ZOOM")
|
||||
->Tag(kAnimateZoomTag)
|
||||
.packets.push_back(
|
||||
mediapipe::MakePacket<bool>(flags.animated_zoom.value())
|
||||
.At(Timestamp(time)));
|
||||
|
@ -238,7 +268,7 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
|
|||
|
||||
if (flags.max_zoom_factor_percent.has_value()) {
|
||||
runner->MutableInputs()
|
||||
->Tag("MAX_ZOOM_FACTOR_PCT")
|
||||
->Tag(kMaxZoomFactorPctTag)
|
||||
.packets.push_back(
|
||||
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
|
||||
.At(Timestamp(time)));
|
||||
|
@ -250,6 +280,21 @@ void AddDetection(const cv::Rect_<float>& position, const int64 time,
|
|||
AddDetectionFrameSize(position, time, 1000, 1000, runner);
|
||||
}
|
||||
|
||||
void CheckCropRectFloats(const float x_center, const float y_center,
|
||||
const float width, const float height,
|
||||
const int frame_number,
|
||||
const CalculatorRunner::StreamContentsSet& output) {
|
||||
ASSERT_GT(output.Tag("NORMALIZED_CROP_RECT").packets.size(), frame_number);
|
||||
auto float_rect = output.Tag("NORMALIZED_CROP_RECT")
|
||||
.packets[frame_number]
|
||||
.Get<mediapipe::NormalizedRect>();
|
||||
|
||||
EXPECT_FLOAT_EQ(float_rect.x_center(), x_center);
|
||||
EXPECT_FLOAT_EQ(float_rect.y_center(), y_center);
|
||||
EXPECT_FLOAT_EQ(float_rect.width(), width);
|
||||
EXPECT_FLOAT_EQ(float_rect.height(), height);
|
||||
}
|
||||
|
||||
void CheckCropRect(const int x_center, const int y_center, const int width,
|
||||
const int height, const int frame_number,
|
||||
const std::vector<Packet>& output_packets) {
|
||||
|
@ -274,21 +319,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
|
|||
|
||||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||
|
||||
runner->MutableInputs()
|
||||
->Tag("SALIENT_REGIONS")
|
||||
->Tag(kSalientRegionsTag)
|
||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("BORDERS").packets;
|
||||
runner->Outputs().Tag(kBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
CheckBorder(static_features, 1000, 1000, 495, 395);
|
||||
CheckBorder(static_features, 1000, 1000, 494, 394);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
||||
|
@ -297,7 +342,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
|
|||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, PanConfig) {
|
||||
|
@ -313,9 +358,9 @@ TEST(ContentZoomingCalculatorTest, PanConfig) {
|
|||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(483, 550, 111, 111, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
|
||||
|
@ -330,31 +375,31 @@ TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
|
|||
options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(50.0);
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(483, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
// Now repeat the last frame for a new runner without the cache to see a reset
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -371,9 +416,9 @@ TEST(ContentZoomingCalculatorTest, TiltConfig) {
|
|||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(450, 583, 111, 111, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ZoomConfig) {
|
||||
|
@ -389,9 +434,9 @@ TEST(ContentZoomingCalculatorTest, ZoomConfig) {
|
|||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(450, 550, 139, 139, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(450, 550, 138, 138, 1,
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
|
||||
|
@ -406,31 +451,31 @@ TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
|
|||
options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2);
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(450, 550, 139, 139, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(450, 550, 138, 138, 0,
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
// Now repeat the last frame for a new runner without the cache to see a reset
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
|
||||
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -448,18 +493,18 @@ TEST(ContentZoomingCalculatorTest, MinAspectBorderValues) {
|
|||
|
||||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||
|
||||
runner->MutableInputs()
|
||||
->Tag("SALIENT_REGIONS")
|
||||
->Tag(kSalientRegionsTag)
|
||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("BORDERS").packets;
|
||||
runner->Outputs().Tag(kBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
CheckBorder(static_features, 1000, 1000, 250, 250);
|
||||
|
@ -485,18 +530,18 @@ TEST(ContentZoomingCalculatorTest, TwoFacesWide) {
|
|||
|
||||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||
|
||||
runner->MutableInputs()
|
||||
->Tag("SALIENT_REGIONS")
|
||||
->Tag(kSalientRegionsTag)
|
||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("BORDERS").packets;
|
||||
runner->Outputs().Tag(kBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
|
||||
|
@ -510,18 +555,18 @@ TEST(ContentZoomingCalculatorTest, NoDetectionOnInit) {
|
|||
|
||||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp(0)));
|
||||
|
||||
runner->MutableInputs()
|
||||
->Tag("SALIENT_REGIONS")
|
||||
->Tag(kSalientRegionsTag)
|
||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("BORDERS").packets;
|
||||
runner->Outputs().Tag(kBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
|
||||
|
@ -542,21 +587,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTestPairSize) {
|
|||
|
||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||
runner->MutableInputs()
|
||||
->Tag("VIDEO_SIZE")
|
||||
->Tag(kVideoSizeTag)
|
||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||
|
||||
runner->MutableInputs()
|
||||
->Tag("SALIENT_REGIONS")
|
||||
->Tag(kSalientRegionsTag)
|
||||
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
|
||||
|
||||
// Run the calculator.
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("BORDERS").packets;
|
||||
runner->Outputs().Tag(kBordersTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& static_features = output_packets[0].Get<StaticFeatures>();
|
||||
CheckBorder(static_features, 1000, 1000, 495, 395);
|
||||
CheckBorder(static_features, 1000, 1000, 494, 394);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
|
||||
|
@ -571,9 +616,9 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
|
|||
AddDetection(cv::Rect_<float>(.9, .9, .1, .1), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(972, 972, 55, 55, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(958, 958, 83, 83, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(944, 944, 83, 83, 1,
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
|
||||
|
@ -587,8 +632,8 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
|
|||
AddDetection(cv::Rect_<float>(0, 0, .05, .05), 0, runner.get());
|
||||
AddDetection(cv::Rect_<float>(0, 0, .1, .1), 1000000, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(42, 42, 83, 83, 1, runner->Outputs().Tag("CROP_RECT").packets);
|
||||
CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(56, 56, 83, 83, 1, runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, VerticalShift) {
|
||||
|
@ -601,7 +646,9 @@ TEST(ContentZoomingCalculatorTest, VerticalShift) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
||||
CheckCropRect(150, 170, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRectFloats(150 / 1000.0, 170 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
|
||||
runner->Outputs());
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, HorizontalShift) {
|
||||
|
@ -614,7 +661,9 @@ TEST(ContentZoomingCalculatorTest, HorizontalShift) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
// 1000px * .1 offset + 1000*.1*.1 shift = 170
|
||||
CheckCropRect(170, 150, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRectFloats(170 / 1000.0, 150 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
|
||||
runner->Outputs());
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
|
||||
|
@ -627,14 +676,14 @@ TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
|
|||
AddDetection(cv::Rect_<float>(.9, 0, .1, .1), 0, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(944, 56, 111, 111, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, EmptySize) {
|
||||
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
ASSERT_EQ(runner->Outputs().Tag("CROP_RECT").packets.size(), 0);
|
||||
ASSERT_EQ(runner->Outputs().Tag(kCropRectTag).packets.size(), 0);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, EmptyDetections) {
|
||||
|
@ -642,11 +691,11 @@ TEST(ContentZoomingCalculatorTest, EmptyDetections) {
|
|||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||
runner->MutableInputs()
|
||||
->Tag("VIDEO_SIZE")
|
||||
->Tag(kVideoSizeTag)
|
||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
|
||||
|
@ -658,9 +707,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 222, 222, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
|
||||
|
@ -669,23 +718,23 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
|
|||
config.add_input_side_packet("STATE_CACHE:state_cache");
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 1000, 1000,
|
||||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 222, 222, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1, 500, 500,
|
||||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -700,11 +749,11 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZooming) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 888, 888, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 588, 588, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
||||
|
@ -713,18 +762,18 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
|||
config.add_input_side_packet("STATE_CACHE:state_cache");
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.1, .1, .8, .8), 0, 1000, 1000,
|
||||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 888, 888, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
// The second runner should just resume based on state from the first runner.
|
||||
{
|
||||
auto runner = ::absl::make_unique<CalculatorRunner>(config);
|
||||
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
|
||||
runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
|
||||
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
|
||||
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
|
||||
runner.get());
|
||||
|
@ -732,9 +781,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 588, 588, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -749,7 +798,7 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
// 55/60 * 1000 = 916
|
||||
CheckCropRect(500, 500, 916, 916, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
||||
|
@ -772,11 +821,11 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
|
|||
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
|
||||
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
|
||||
CheckCropRect(320, 240, 480, 360, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(640, 360, 769, 433, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(320, 240, 480, 360, 3,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
||||
|
@ -795,9 +844,9 @@ TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
// 55/60 * 1000 = 916
|
||||
CheckCropRect(500, 500, 950, 950, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 1000, 1000, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
|
||||
|
@ -816,13 +865,13 @@ TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 880, 880, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 760, 760, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 655, 655, 3,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
|
||||
|
@ -844,15 +893,15 @@ TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 1000, 1000, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 470, 470, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 222, 222, 3,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 222, 222, 4,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
|
||||
|
@ -879,15 +928,15 @@ TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
|
|||
runner.get(), {.animated_zoom = false});
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 1000, 1000, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 470, 470, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 222, 222, 3,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 222, 222, 4,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
|
||||
|
@ -907,11 +956,11 @@ TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
|
|||
runner.get(), {.animated_zoom = false});
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckCropRect(500, 500, 1000, 1000, 0,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 880, 880, 1,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
CheckCropRect(500, 500, 760, 760, 2,
|
||||
runner->Outputs().Tag("CROP_RECT").packets);
|
||||
runner->Outputs().Tag(kCropRectTag).packets);
|
||||
}
|
||||
|
||||
TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
|
||||
|
@ -920,13 +969,13 @@ TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
|
|||
|
||||
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
|
||||
runner->MutableInputs()
|
||||
->Tag("VIDEO_SIZE")
|
||||
->Tag(kVideoSizeTag)
|
||||
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
|
||||
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("FIRST_CROP_RECT").packets;
|
||||
runner->Outputs().Tag(kFirstCropRectTag).packets;
|
||||
ASSERT_EQ(output_packets.size(), 1);
|
||||
const auto& rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
||||
EXPECT_EQ(rect.x_center(), 0);
|
||||
|
@ -951,7 +1000,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
|
|||
runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("FIRST_CROP_RECT").packets;
|
||||
runner->Outputs().Tag(kFirstCropRectTag).packets;
|
||||
ASSERT_EQ(output_packets.size(), 4);
|
||||
const auto& first_rect = output_packets[0].Get<mediapipe::NormalizedRect>();
|
||||
EXPECT_NEAR(first_rect.x_center(), 0.5, 0.05);
|
||||
|
|
|
@ -64,7 +64,7 @@ message FaceBoxAdjusterCalculatorOptions {
|
|||
|
||||
// Max value of head motion (max of current or history) to be considered still
|
||||
// stable.
|
||||
optional float head_motion_threshold = 14 [default = 10.0];
|
||||
optional float head_motion_threshold = 14 [default = 360.0];
|
||||
|
||||
// The max amount of time to use an old eye distance when the face look angle
|
||||
// is unstable.
|
||||
|
|
|
@ -32,6 +32,10 @@
|
|||
namespace mediapipe {
|
||||
namespace autoflip {
|
||||
|
||||
constexpr char kRegionsTag[] = "REGIONS";
|
||||
constexpr char kFacesTag[] = "FACES";
|
||||
constexpr char kVideoTag[] = "VIDEO";
|
||||
|
||||
// This calculator converts detected faces to SalientRegion protos that can be
|
||||
// used for downstream processing. Each SalientRegion is scored using image
|
||||
// cues. Scoring can be controlled through
|
||||
|
@ -80,17 +84,17 @@ FaceToRegionCalculator::FaceToRegionCalculator() {}
|
|||
|
||||
absl::Status FaceToRegionCalculator::GetContract(
|
||||
mediapipe::CalculatorContract* cc) {
|
||||
if (cc->Inputs().HasTag("VIDEO")) {
|
||||
cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
|
||||
if (cc->Inputs().HasTag(kVideoTag)) {
|
||||
cc->Inputs().Tag(kVideoTag).Set<ImageFrame>();
|
||||
}
|
||||
cc->Inputs().Tag("FACES").Set<std::vector<mediapipe::Detection>>();
|
||||
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
|
||||
cc->Inputs().Tag(kFacesTag).Set<std::vector<mediapipe::Detection>>();
|
||||
cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status FaceToRegionCalculator::Open(mediapipe::CalculatorContext* cc) {
|
||||
options_ = cc->Options<FaceToRegionCalculatorOptions>();
|
||||
if (!cc->Inputs().HasTag("VIDEO")) {
|
||||
if (!cc->Inputs().HasTag(kVideoTag)) {
|
||||
RET_CHECK(!options_.use_visual_scorer())
|
||||
<< "VIDEO input must be provided when using visual_scorer.";
|
||||
RET_CHECK(!options_.export_individual_face_landmarks())
|
||||
|
@ -146,24 +150,24 @@ void FaceToRegionCalculator::ExtendSalientRegionWithPoint(
|
|||
}
|
||||
|
||||
absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
|
||||
if (cc->Inputs().HasTag("VIDEO") &&
|
||||
cc->Inputs().Tag("VIDEO").Value().IsEmpty()) {
|
||||
if (cc->Inputs().HasTag(kVideoTag) &&
|
||||
cc->Inputs().Tag(kVideoTag).Value().IsEmpty()) {
|
||||
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
|
||||
<< "No VIDEO input at time " << cc->InputTimestamp().Seconds();
|
||||
}
|
||||
|
||||
cv::Mat frame;
|
||||
if (cc->Inputs().HasTag("VIDEO")) {
|
||||
if (cc->Inputs().HasTag(kVideoTag)) {
|
||||
frame = mediapipe::formats::MatView(
|
||||
&cc->Inputs().Tag("VIDEO").Get<ImageFrame>());
|
||||
&cc->Inputs().Tag(kVideoTag).Get<ImageFrame>());
|
||||
frame_width_ = frame.cols;
|
||||
frame_height_ = frame.rows;
|
||||
}
|
||||
|
||||
auto region_set = ::absl::make_unique<DetectionSet>();
|
||||
if (!cc->Inputs().Tag("FACES").Value().IsEmpty()) {
|
||||
if (!cc->Inputs().Tag(kFacesTag).Value().IsEmpty()) {
|
||||
const auto& input_faces =
|
||||
cc->Inputs().Tag("FACES").Get<std::vector<mediapipe::Detection>>();
|
||||
cc->Inputs().Tag(kFacesTag).Get<std::vector<mediapipe::Detection>>();
|
||||
|
||||
for (const auto& input_face : input_faces) {
|
||||
RET_CHECK(input_face.location_data().format() ==
|
||||
|
@ -276,7 +280,9 @@ absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
|
|||
}
|
||||
}
|
||||
}
|
||||
cc->Outputs().Tag("REGIONS").Add(region_set.release(), cc->InputTimestamp());
|
||||
cc->Outputs()
|
||||
.Tag(kRegionsTag)
|
||||
.Add(region_set.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kRegionsTag[] = "REGIONS";
|
||||
constexpr char kFacesTag[] = "FACES";
|
||||
constexpr char kVideoTag[] = "VIDEO";
|
||||
|
||||
const char kConfig[] = R"(
|
||||
calculator: "FaceToRegionCalculator"
|
||||
input_stream: "VIDEO:frames"
|
||||
|
@ -100,7 +104,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
|
|||
if (include_video) {
|
||||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600);
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp::PostStream()));
|
||||
}
|
||||
// Setup two faces as input.
|
||||
|
@ -109,7 +113,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
|
|||
for (const auto& face : faces) {
|
||||
input_faces->push_back(ParseTextProtoOrDie<Detection>(face));
|
||||
}
|
||||
runner->MutableInputs()->Tag("FACES").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kFacesTag).packets.push_back(
|
||||
Adopt(input_faces.release()).At(Timestamp::PostStream()));
|
||||
}
|
||||
|
||||
|
@ -144,7 +148,7 @@ TEST(FaceToRegionCalculatorTest, FaceFullTypeSize) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
|
@ -177,7 +181,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksTypeSize) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
|
@ -208,7 +212,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksBox) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
|
@ -243,7 +247,7 @@ TEST(FaceToRegionCalculatorTest, FaceScore) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
ASSERT_EQ(1, regions.detections().size());
|
||||
|
@ -292,7 +296,7 @@ TEST(FaceToRegionCalculatorTest, FaceNoVideoPass) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
|
|
|
@ -52,6 +52,9 @@ LocalizationToRegionCalculator::LocalizationToRegionCalculator() {}
|
|||
|
||||
namespace {
|
||||
|
||||
constexpr char kRegionsTag[] = "REGIONS";
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
|
||||
// Converts an object detection to a autoflip SignalType. Returns true if the
|
||||
// std::string label has a autoflip label.
|
||||
bool MatchType(const std::string& label, SignalType* type) {
|
||||
|
@ -86,8 +89,8 @@ void FillSalientRegion(const mediapipe::Detection& detection,
|
|||
|
||||
absl::Status LocalizationToRegionCalculator::GetContract(
|
||||
mediapipe::CalculatorContract* cc) {
|
||||
cc->Inputs().Tag("DETECTIONS").Set<std::vector<mediapipe::Detection>>();
|
||||
cc->Outputs().Tag("REGIONS").Set<DetectionSet>();
|
||||
cc->Inputs().Tag(kDetectionsTag).Set<std::vector<mediapipe::Detection>>();
|
||||
cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -101,7 +104,7 @@ absl::Status LocalizationToRegionCalculator::Open(
|
|||
absl::Status LocalizationToRegionCalculator::Process(
|
||||
mediapipe::CalculatorContext* cc) {
|
||||
const auto& annotations =
|
||||
cc->Inputs().Tag("DETECTIONS").Get<std::vector<mediapipe::Detection>>();
|
||||
cc->Inputs().Tag(kDetectionsTag).Get<std::vector<mediapipe::Detection>>();
|
||||
auto regions = ::absl::make_unique<DetectionSet>();
|
||||
for (const auto& detection : annotations) {
|
||||
RET_CHECK_EQ(detection.label().size(), 1)
|
||||
|
@ -118,7 +121,7 @@ absl::Status LocalizationToRegionCalculator::Process(
|
|||
}
|
||||
}
|
||||
|
||||
cc->Outputs().Tag("REGIONS").Add(regions.release(), cc->InputTimestamp());
|
||||
cc->Outputs().Tag(kRegionsTag).Add(regions.release(), cc->InputTimestamp());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,9 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kRegionsTag[] = "REGIONS";
|
||||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
|
||||
const char kConfig[] = R"(
|
||||
calculator: "LocalizationToRegionCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
|
@ -81,7 +84,7 @@ void SetInputs(CalculatorRunner* runner,
|
|||
inputs->push_back(ParseTextProtoOrDie<Detection>(detection));
|
||||
}
|
||||
runner->MutableInputs()
|
||||
->Tag("DETECTIONS")
|
||||
->Tag(kDetectionsTag)
|
||||
.packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream()));
|
||||
}
|
||||
|
||||
|
@ -109,7 +112,7 @@ TEST(LocalizationToRegionCalculatorTest, StandardTypes) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
ASSERT_EQ(2, regions.detections().size());
|
||||
|
@ -137,7 +140,7 @@ TEST(LocalizationToRegionCalculatorTest, AllTypes) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
ASSERT_EQ(3, regions.detections().size());
|
||||
|
@ -153,7 +156,7 @@ TEST(LocalizationToRegionCalculatorTest, BothTypes) {
|
|||
|
||||
// Check the output regions.
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("REGIONS").packets;
|
||||
runner->Outputs().Tag(kRegionsTag).packets;
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
const auto& regions = output_packets[0].Get<DetectionSet>();
|
||||
ASSERT_EQ(5, regions.detections().size());
|
||||
|
|
|
@ -34,6 +34,23 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kFramingDetectionsVizFramesTag[] =
|
||||
"FRAMING_DETECTIONS_VIZ_FRAMES";
|
||||
constexpr char kExternalRenderingFullVidTag[] = "EXTERNAL_RENDERING_FULL_VID";
|
||||
constexpr char kExternalRenderingPerFrameTag[] = "EXTERNAL_RENDERING_PER_FRAME";
|
||||
constexpr char kCroppingSummaryTag[] = "CROPPING_SUMMARY";
|
||||
constexpr char kSalientPointFrameVizFramesTag[] =
|
||||
"SALIENT_POINT_FRAME_VIZ_FRAMES";
|
||||
constexpr char kKeyFrameCropRegionVizFramesTag[] =
|
||||
"KEY_FRAME_CROP_REGION_VIZ_FRAMES";
|
||||
constexpr char kCroppedFramesTag[] = "CROPPED_FRAMES";
|
||||
constexpr char kShotBoundariesTag[] = "SHOT_BOUNDARIES";
|
||||
constexpr char kStaticFeaturesTag[] = "STATIC_FEATURES";
|
||||
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
|
||||
constexpr char kVideoFramesTag[] = "VIDEO_FRAMES";
|
||||
constexpr char kDetectionFeaturesTag[] = "DETECTION_FEATURES";
|
||||
constexpr char kKeyFramesTag[] = "KEY_FRAMES";
|
||||
|
||||
using ::testing::HasSubstr;
|
||||
|
||||
constexpr char kConfig[] = R"(
|
||||
|
@ -241,10 +258,10 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
|
|||
const int key_frame_height, bool randomize,
|
||||
CalculatorRunner::StreamContentsSet* inputs) {
|
||||
Timestamp timestamp(time_ms);
|
||||
if (inputs->HasTag("KEY_FRAMES")) {
|
||||
if (inputs->HasTag(kKeyFramesTag)) {
|
||||
auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width,
|
||||
key_frame_height);
|
||||
inputs->Tag("KEY_FRAMES")
|
||||
inputs->Tag(kKeyFramesTag)
|
||||
.packets.push_back(Adopt(key_frame.release()).At(timestamp));
|
||||
}
|
||||
if (randomize) {
|
||||
|
@ -252,11 +269,11 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
|
|||
kMinNumDetections, kMaxNumDetections)(GetGen());
|
||||
auto detections =
|
||||
MakeDetections(num_detections, key_frame_width, key_frame_height);
|
||||
inputs->Tag("DETECTION_FEATURES")
|
||||
inputs->Tag(kDetectionFeaturesTag)
|
||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||
} else {
|
||||
auto detections = MakeCenterDetection(key_frame_width, key_frame_height);
|
||||
inputs->Tag("DETECTION_FEATURES")
|
||||
inputs->Tag(kDetectionFeaturesTag)
|
||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||
}
|
||||
}
|
||||
|
@ -272,19 +289,19 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
|||
int64 time_ms = start_frame_index * kTimestampDiff;
|
||||
for (int i = 0; i < num_scene_frames; ++i) {
|
||||
Timestamp timestamp(time_ms);
|
||||
if (inputs->HasTag("VIDEO_FRAMES")) {
|
||||
if (inputs->HasTag(kVideoFramesTag)) {
|
||||
auto frame =
|
||||
MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height);
|
||||
inputs->Tag("VIDEO_FRAMES")
|
||||
inputs->Tag(kVideoFramesTag)
|
||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||
} else {
|
||||
auto input_size =
|
||||
::absl::make_unique<std::pair<int, int>>(frame_width, frame_height);
|
||||
inputs->Tag("VIDEO_SIZE")
|
||||
inputs->Tag(kVideoSizeTag)
|
||||
.packets.push_back(Adopt(input_size.release()).At(timestamp));
|
||||
}
|
||||
auto static_features = absl::make_unique<StaticFeatures>();
|
||||
inputs->Tag("STATIC_FEATURES")
|
||||
inputs->Tag(kStaticFeaturesTag)
|
||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||
if (DownSampleRate == 1) {
|
||||
AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, false,
|
||||
|
@ -294,7 +311,7 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
|||
inputs);
|
||||
}
|
||||
if (i == num_scene_frames - 1) { // adds shot boundary
|
||||
inputs->Tag("SHOT_BOUNDARIES")
|
||||
inputs->Tag(kShotBoundariesTag)
|
||||
.packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms)));
|
||||
}
|
||||
time_ms += kTimestampDiff;
|
||||
|
@ -306,8 +323,8 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
|
|||
void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames,
|
||||
const int target_width, const int target_height) {
|
||||
const auto& outputs = runner.Outputs();
|
||||
EXPECT_TRUE(outputs.HasTag("CROPPED_FRAMES"));
|
||||
const auto& cropped_frames_outputs = outputs.Tag("CROPPED_FRAMES").packets;
|
||||
EXPECT_TRUE(outputs.HasTag(kCroppedFramesTag));
|
||||
const auto& cropped_frames_outputs = outputs.Tag(kCroppedFramesTag).packets;
|
||||
EXPECT_EQ(cropped_frames_outputs.size(), num_frames);
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
||||
|
@ -392,23 +409,23 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
|
|||
|
||||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& outputs = runner->Outputs();
|
||||
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES"));
|
||||
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES"));
|
||||
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY"));
|
||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME"));
|
||||
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID"));
|
||||
EXPECT_TRUE(outputs.HasTag("FRAMING_DETECTIONS_VIZ_FRAMES"));
|
||||
EXPECT_TRUE(outputs.HasTag(kKeyFrameCropRegionVizFramesTag));
|
||||
EXPECT_TRUE(outputs.HasTag(kSalientPointFrameVizFramesTag));
|
||||
EXPECT_TRUE(outputs.HasTag(kCroppingSummaryTag));
|
||||
EXPECT_TRUE(outputs.HasTag(kExternalRenderingPerFrameTag));
|
||||
EXPECT_TRUE(outputs.HasTag(kExternalRenderingFullVidTag));
|
||||
EXPECT_TRUE(outputs.HasTag(kFramingDetectionsVizFramesTag));
|
||||
const auto& crop_region_viz_frames_outputs =
|
||||
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets;
|
||||
outputs.Tag(kKeyFrameCropRegionVizFramesTag).packets;
|
||||
const auto& salient_point_viz_frames_outputs =
|
||||
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets;
|
||||
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets;
|
||||
outputs.Tag(kSalientPointFrameVizFramesTag).packets;
|
||||
const auto& summary_output = outputs.Tag(kCroppingSummaryTag).packets;
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||
const auto& ext_render_full_vid =
|
||||
outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets;
|
||||
outputs.Tag(kExternalRenderingFullVidTag).packets;
|
||||
const auto& framing_viz_frames_output =
|
||||
outputs.Tag("FRAMING_DETECTIONS_VIZ_FRAMES").packets;
|
||||
outputs.Tag(kFramingDetectionsVizFramesTag).packets;
|
||||
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
|
||||
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
|
||||
EXPECT_EQ(framing_viz_frames_output.size(), num_frames);
|
||||
|
@ -597,7 +614,7 @@ TEST(SceneCroppingCalculatorTest, ProducesEvenFrameSize) {
|
|||
kKeyFrameHeight, kDownSampleRate, runner->MutableInputs());
|
||||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& output_frame = runner->Outputs()
|
||||
.Tag("CROPPED_FRAMES")
|
||||
.Tag(kCroppedFramesTag)
|
||||
.packets[0]
|
||||
.Get<ImageFrame>();
|
||||
EXPECT_EQ(output_frame.Width() % 2, 0);
|
||||
|
@ -646,7 +663,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
|||
Timestamp timestamp(time_ms);
|
||||
auto frame =
|
||||
MakeImageFrameFromColor(GetRandomColor(), input_width, input_height);
|
||||
inputs->Tag("VIDEO_FRAMES")
|
||||
inputs->Tag(kVideoFramesTag)
|
||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||
if (i % static_features_downsample_rate == 0) {
|
||||
auto static_features = absl::make_unique<StaticFeatures>();
|
||||
|
@ -657,7 +674,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
|||
color->set_g(green);
|
||||
color->set_b(red);
|
||||
}
|
||||
inputs->Tag("STATIC_FEATURES")
|
||||
inputs->Tag(kStaticFeaturesTag)
|
||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||
num_static_features++;
|
||||
}
|
||||
|
@ -672,7 +689,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
|||
location->set_y(0);
|
||||
location->set_width(80);
|
||||
location->set_height(input_height);
|
||||
inputs->Tag("DETECTION_FEATURES")
|
||||
inputs->Tag(kDetectionFeaturesTag)
|
||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||
}
|
||||
time_ms += kTimestampDiff;
|
||||
|
@ -683,7 +700,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
|
|||
// Checks that the top and bottom borders indeed have the background color.
|
||||
const int border_size = 37;
|
||||
const auto& cropped_frames_outputs =
|
||||
runner->Outputs().Tag("CROPPED_FRAMES").packets;
|
||||
runner->Outputs().Tag(kCroppedFramesTag).packets;
|
||||
EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize);
|
||||
for (int i = 0; i < kSceneSize; ++i) {
|
||||
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
|
||||
|
@ -727,7 +744,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
|||
auto mat = formats::MatView(frame.get());
|
||||
mat(top_border_rect) = border_color;
|
||||
mat(bottom_border_rect) = border_color;
|
||||
inputs->Tag("VIDEO_FRAMES")
|
||||
inputs->Tag(kVideoFramesTag)
|
||||
.packets.push_back(Adopt(frame.release()).At(timestamp));
|
||||
// Set borders in static features.
|
||||
auto static_features = absl::make_unique<StaticFeatures>();
|
||||
|
@ -737,11 +754,11 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
|||
auto* bottom_part = static_features->add_border();
|
||||
bottom_part->set_relative_position(Border::BOTTOM);
|
||||
bottom_part->mutable_border_position()->set_height(bottom_border_size);
|
||||
inputs->Tag("STATIC_FEATURES")
|
||||
inputs->Tag(kStaticFeaturesTag)
|
||||
.packets.push_back(Adopt(static_features.release()).At(timestamp));
|
||||
// Add empty detections to ensure no padding is used.
|
||||
auto detections = absl::make_unique<DetectionSet>();
|
||||
inputs->Tag("DETECTION_FEATURES")
|
||||
inputs->Tag(kDetectionFeaturesTag)
|
||||
.packets.push_back(Adopt(detections.release()).At(timestamp));
|
||||
|
||||
MP_EXPECT_OK(runner->Run());
|
||||
|
@ -749,7 +766,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
|
|||
// Checks that the top and bottom borders are removed. Each frame should have
|
||||
// solid color equal to frame color.
|
||||
const auto& cropped_frames_outputs =
|
||||
runner->Outputs().Tag("CROPPED_FRAMES").packets;
|
||||
runner->Outputs().Tag(kCroppedFramesTag).packets;
|
||||
EXPECT_EQ(cropped_frames_outputs.size(), 1);
|
||||
const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>();
|
||||
const auto cropped_mat = formats::MatView(&cropped_frame);
|
||||
|
@ -775,7 +792,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPath) {
|
|||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& outputs = runner->Outputs();
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||
|
||||
for (int i = 0; i < num_frames - 1; ++i) {
|
||||
|
@ -813,7 +830,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPath) {
|
|||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& outputs = runner->Outputs();
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||
|
||||
for (int i = 0; i < num_frames - 1; ++i) {
|
||||
|
@ -846,7 +863,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPathNoVideo) {
|
|||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& outputs = runner->Outputs();
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||
|
||||
for (int i = 0; i < num_frames - 1; ++i) {
|
||||
|
@ -886,7 +903,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPathNoVideo) {
|
|||
MP_EXPECT_OK(runner->Run());
|
||||
const auto& outputs = runner->Outputs();
|
||||
const auto& ext_render_per_frame =
|
||||
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets;
|
||||
outputs.Tag(kExternalRenderingPerFrameTag).packets;
|
||||
EXPECT_EQ(ext_render_per_frame.size(), num_frames);
|
||||
|
||||
for (int i = 0; i < num_frames - 1; ++i) {
|
||||
|
|
|
@ -43,6 +43,9 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kIsShotChangeTag[] = "IS_SHOT_CHANGE";
|
||||
constexpr char kVideoTag[] = "VIDEO";
|
||||
|
||||
const char kConfig[] = R"(
|
||||
calculator: "ShotBoundaryCalculator"
|
||||
input_stream: "VIDEO:camera_frames"
|
||||
|
@ -70,7 +73,7 @@ void AddFrames(const int number_of_frames, const std::set<int>& skip_frames,
|
|||
if (skip_frames.count(i) < 1) {
|
||||
sub_image.copyTo(frame_area);
|
||||
}
|
||||
runner->MutableInputs()->Tag("VIDEO").packets.push_back(
|
||||
runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
|
||||
Adopt(input_frame.release()).At(Timestamp(i * 1000000)));
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +100,7 @@ TEST(ShotBoundaryCalculatorTest, NoShotChange) {
|
|||
|
||||
AddFrames(10, {}, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckOutput(10, {}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
||||
CheckOutput(10, {}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||
}
|
||||
|
||||
TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
|
||||
|
@ -110,7 +113,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
|
|||
|
||||
AddFrames(20, {10}, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckOutput(20, {10}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
||||
CheckOutput(20, {10}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||
}
|
||||
|
||||
TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
|
||||
|
@ -123,7 +126,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
|
|||
|
||||
AddFrames(20, {14, 17}, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckOutput(20, {14, 17}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
||||
CheckOutput(20, {14, 17}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||
}
|
||||
|
||||
TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
|
||||
|
@ -140,7 +143,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
|
|||
|
||||
AddFrames(24, {16, 19}, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
CheckOutput(24, {16}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets);
|
||||
CheckOutput(24, {16}, runner->Outputs().Tag(kIsShotChangeTag).packets);
|
||||
}
|
||||
|
||||
TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
|
||||
|
@ -153,7 +156,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
|
|||
|
||||
AddFrames(20, {15}, runner.get());
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
auto output_packets = runner->Outputs().Tag("IS_SHOT_CHANGE").packets;
|
||||
auto output_packets = runner->Outputs().Tag(kIsShotChangeTag).packets;
|
||||
ASSERT_EQ(output_packets.size(), 1);
|
||||
ASSERT_EQ(output_packets[0].Get<bool>(), true);
|
||||
ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000);
|
||||
|
|
|
@ -32,6 +32,9 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kOutputTag[] = "OUTPUT";
|
||||
constexpr char kIsShotBoundaryTag[] = "IS_SHOT_BOUNDARY";
|
||||
|
||||
const char kConfigA[] = R"(
|
||||
calculator: "SignalFusingCalculator"
|
||||
input_stream: "scene_change"
|
||||
|
@ -160,7 +163,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
|
|||
|
||||
auto input_shot = absl::make_unique<bool>(false);
|
||||
runner->MutableInputs()
|
||||
->Tag("IS_SHOT_BOUNDARY")
|
||||
->Tag(kIsShotBoundaryTag)
|
||||
.packets.push_back(Adopt(input_shot.release()).At(Timestamp(0)));
|
||||
|
||||
auto input_face =
|
||||
|
@ -200,7 +203,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("OUTPUT").packets;
|
||||
runner->Outputs().Tag(kOutputTag).packets;
|
||||
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
||||
|
||||
ASSERT_EQ(detection_set.detections().size(), 4);
|
||||
|
@ -251,7 +254,7 @@ TEST(SignalFusingCalculatorTest, TwoInputNoShotLabeledTags) {
|
|||
MP_ASSERT_OK(runner->Run());
|
||||
|
||||
const std::vector<Packet>& output_packets =
|
||||
runner->Outputs().Tag("OUTPUT").packets;
|
||||
runner->Outputs().Tag(kOutputTag).packets;
|
||||
const auto& detection_set = output_packets[0].Get<DetectionSet>();
|
||||
|
||||
ASSERT_EQ(detection_set.detections().size(), 4);
|
||||
|
|
|
@ -31,6 +31,9 @@ namespace mediapipe {
|
|||
namespace autoflip {
|
||||
namespace {
|
||||
|
||||
constexpr char kOutputFramesTag[] = "OUTPUT_FRAMES";
|
||||
constexpr char kInputFramesTag[] = "INPUT_FRAMES";
|
||||
|
||||
// Default configuration of the calculator.
|
||||
CalculatorGraphConfig::Node GetCalculatorNode(
|
||||
const std::string& fail_if_any, const std::string& extra_options = "") {
|
||||
|
@ -65,10 +68,10 @@ TEST(VideoFilterCalculatorTest, UpperBoundNoPass) {
|
|||
ImageFormat::SRGB, kFixedWidth,
|
||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||
runner->MutableInputs()
|
||||
->Tag("INPUT_FRAMES")
|
||||
->Tag(kInputFramesTag)
|
||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
||||
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||
EXPECT_TRUE(output_packet.empty());
|
||||
}
|
||||
|
||||
|
@ -88,10 +91,10 @@ TEST(VerticalFrameRemovalCalculatorTest, UpperBoundPass) {
|
|||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
||||
runner->MutableInputs()
|
||||
->Tag("INPUT_FRAMES")
|
||||
->Tag(kInputFramesTag)
|
||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
||||
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||
EXPECT_EQ(1, output_packet.size());
|
||||
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
||||
EXPECT_EQ(kWidth, output_frame.Width());
|
||||
|
@ -114,10 +117,10 @@ TEST(VideoFilterCalculatorTest, LowerBoundNoPass) {
|
|||
ImageFormat::SRGB, kFixedWidth,
|
||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||
runner->MutableInputs()
|
||||
->Tag("INPUT_FRAMES")
|
||||
->Tag(kInputFramesTag)
|
||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
||||
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||
EXPECT_TRUE(output_packet.empty());
|
||||
}
|
||||
|
||||
|
@ -137,10 +140,10 @@ TEST(VerticalFrameRemovalCalculatorTest, LowerBoundPass) {
|
|||
auto input_frame =
|
||||
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
|
||||
runner->MutableInputs()
|
||||
->Tag("INPUT_FRAMES")
|
||||
->Tag(kInputFramesTag)
|
||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||
MP_ASSERT_OK(runner->Run());
|
||||
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets;
|
||||
const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
|
||||
EXPECT_EQ(1, output_packet.size());
|
||||
auto& output_frame = output_packet[0].Get<ImageFrame>();
|
||||
EXPECT_EQ(kWidth, output_frame.Width());
|
||||
|
@ -164,7 +167,7 @@ TEST(VerticalFrameRemovalCalculatorTest, OutputError) {
|
|||
ImageFormat::SRGB, kFixedWidth,
|
||||
static_cast<int>(kFixedWidth / kAspectRatio), 16);
|
||||
runner->MutableInputs()
|
||||
->Tag("INPUT_FRAMES")
|
||||
->Tag(kInputFramesTag)
|
||||
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
|
||||
absl::Status status = runner->Run();
|
||||
EXPECT_EQ(status.code(), absl::StatusCode::kUnknown);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.h"
|
||||
|
||||
constexpr float kMinVelocity = 0.5;
|
||||
|
||||
namespace mediapipe {
|
||||
namespace autoflip {
|
||||
namespace {
|
||||
|
@ -75,6 +77,7 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
current_position_px_ = position;
|
||||
}
|
||||
target_position_px_ = position;
|
||||
prior_position_px_ = current_position_px_;
|
||||
motion_state_ = false;
|
||||
mean_delta_t_ = -1;
|
||||
raw_positions_at_time_.push_front(
|
||||
|
@ -106,6 +109,11 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
options_.reframe_window())
|
||||
<< "Reframe window cannot exceed min_motion_to_reframe.";
|
||||
}
|
||||
RET_CHECK(options_.has_max_velocity() ^
|
||||
(options_.has_max_velocity_scale() &&
|
||||
options_.has_max_velocity_shift()))
|
||||
<< "Must either set max_velocity or set both max_velocity_scale and "
|
||||
"max_velocity_shift.";
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -123,9 +131,29 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
}
|
||||
|
||||
int filtered_position = Median(raw_positions_at_time_);
|
||||
|
||||
float min_reframe = (options_.has_min_motion_to_reframe()
|
||||
? options_.min_motion_to_reframe()
|
||||
: options_.min_motion_to_reframe_lower()) *
|
||||
pixels_per_degree_;
|
||||
float max_reframe = (options_.has_min_motion_to_reframe()
|
||||
? options_.min_motion_to_reframe()
|
||||
: options_.min_motion_to_reframe_upper()) *
|
||||
pixels_per_degree_;
|
||||
|
||||
filtered_position = fmax(min_location_ - min_reframe, filtered_position);
|
||||
filtered_position = fmin(max_location_ + max_reframe, filtered_position);
|
||||
|
||||
double delta_degs =
|
||||
(filtered_position - current_position_px_) / pixels_per_degree_;
|
||||
|
||||
double max_velocity =
|
||||
options_.has_max_velocity()
|
||||
? options_.max_velocity()
|
||||
: fmax(abs(delta_degs * options_.max_velocity_scale()) +
|
||||
options_.max_velocity_shift(),
|
||||
kMinVelocity);
|
||||
|
||||
// If the motion is smaller than the min_motion_to_reframe and camera is
|
||||
// stationary, don't use the update.
|
||||
if (IsMotionTooSmall(delta_degs) && !motion_state_) {
|
||||
|
@ -169,10 +197,9 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
options_.max_update_rate());
|
||||
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
|
||||
observed_velocity * update_rate;
|
||||
// Limited current velocity.
|
||||
current_velocity_deg_per_s_ =
|
||||
updated_velocity > 0 ? fmin(updated_velocity, options_.max_velocity())
|
||||
: fmax(updated_velocity, -options_.max_velocity());
|
||||
current_velocity_deg_per_s_ = updated_velocity > 0
|
||||
? fmin(updated_velocity, max_velocity)
|
||||
: fmax(updated_velocity, -max_velocity);
|
||||
|
||||
// Update prediction based on time input.
|
||||
return UpdatePrediction(time_us);
|
||||
|
@ -182,6 +209,9 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
|
|||
RET_CHECK(current_time_ < time_us)
|
||||
<< "Prediction time added before a prior observation or prediction.";
|
||||
|
||||
// Store prior pixel location.
|
||||
prior_position_px_ = current_position_px_;
|
||||
|
||||
// Position update limited by min/max.
|
||||
double update_position_px =
|
||||
current_position_px_ +
|
||||
|
@ -209,7 +239,19 @@ absl::Status KinematicPathSolver::GetState(int* position) {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status KinematicPathSolver::SetState(const int position) {
|
||||
absl::Status KinematicPathSolver::GetState(float* position) {
|
||||
RET_CHECK(initialized_) << "GetState called before first observation added.";
|
||||
*position = current_position_px_;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
|
||||
RET_CHECK(initialized_) << "GetState called before first observation added.";
|
||||
*delta_position = current_position_px_ - prior_position_px_;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status KinematicPathSolver::SetState(const float position) {
|
||||
RET_CHECK(initialized_) << "SetState called before first observation added.";
|
||||
current_position_px_ = position;
|
||||
return absl::OkStatus();
|
||||
|
@ -218,7 +260,15 @@ absl::Status KinematicPathSolver::SetState(const int position) {
|
|||
absl::Status KinematicPathSolver::GetTargetPosition(int* target_position) {
|
||||
RET_CHECK(initialized_)
|
||||
<< "GetTargetPosition called before first observation added.";
|
||||
*target_position = round(target_position_px_);
|
||||
|
||||
// Provide target position clamped by min/max locations.
|
||||
if (target_position_px_ < min_location_) {
|
||||
*target_position = min_location_;
|
||||
} else if (target_position_px_ > max_location_) {
|
||||
*target_position = max_location_;
|
||||
} else {
|
||||
*target_position = round(target_position_px_);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -238,6 +288,7 @@ absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
|
|||
double updated_distance = max_location - min_location;
|
||||
double scale_change = updated_distance / prior_distance;
|
||||
current_position_px_ = current_position_px_ * scale_change;
|
||||
prior_position_px_ = prior_position_px_ * scale_change;
|
||||
target_position_px_ = target_position_px_ * scale_change;
|
||||
max_location_ = max_location;
|
||||
min_location_ = min_location;
|
||||
|
|
|
@ -46,10 +46,12 @@ class KinematicPathSolver {
|
|||
absl::Status AddObservation(int position, const uint64 time_us);
|
||||
// Get the predicted position at a time.
|
||||
absl::Status UpdatePrediction(const int64 time_us);
|
||||
// Get the state at a time.
|
||||
// Get the state at a time, as an int.
|
||||
absl::Status GetState(int* position);
|
||||
// Get the state at a time, as a float.
|
||||
absl::Status GetState(float* position);
|
||||
// Overwrite the current state value.
|
||||
absl::Status SetState(const int position);
|
||||
absl::Status SetState(const float position);
|
||||
// Update PixelPerDegree value.
|
||||
absl::Status UpdatePixelsPerDegree(const float pixels_per_degree);
|
||||
// Provide the current target position of the reframe action.
|
||||
|
@ -66,6 +68,8 @@ class KinematicPathSolver {
|
|||
// Clear any history buffer of positions that are used when
|
||||
// filtering_time_window_us is set to a non-zero value.
|
||||
void ClearHistory();
|
||||
// Provides the change in position from last state.
|
||||
absl::Status GetDeltaState(float* delta_position);
|
||||
|
||||
private:
|
||||
// Tuning options.
|
||||
|
@ -77,6 +81,7 @@ class KinematicPathSolver {
|
|||
float pixels_per_degree_;
|
||||
// Current state values.
|
||||
double current_position_px_;
|
||||
double prior_position_px_;
|
||||
double current_velocity_deg_per_s_;
|
||||
uint64 current_time_;
|
||||
// History of observations (second) and their time (first).
|
||||
|
|
|
@ -6,8 +6,9 @@ message KinematicOptions {
|
|||
// Weighted update of new camera velocity (measurement) vs current state
|
||||
// (prediction).
|
||||
optional double update_rate = 1 [default = 0.5, deprecated = true];
|
||||
// Max velocity (degrees per second) that the camera can move.
|
||||
optional double max_velocity = 2 [default = 18];
|
||||
// Max velocity (degrees per second) that the camera can move. Cannot be used
|
||||
// with max_velocity_scale or max_velocity_shift.
|
||||
optional double max_velocity = 2;
|
||||
// Min motion (in degrees) to react for both upper and lower directions. Must
|
||||
// not be set if using min_motion_to_reframe_lower and
|
||||
// min_motion_to_reframe_upper.
|
||||
|
@ -30,4 +31,12 @@ message KinematicOptions {
|
|||
optional int64 filtering_time_window_us = 7 [default = 0];
|
||||
// Weighted update of average period, used for motion updates.
|
||||
optional float mean_period_update_rate = 8 [default = 0.25];
|
||||
// Scale factor for max velocity, to be multiplied by the distance from center
|
||||
// in degrees. Cannot be used with max_velocity and must be used with
|
||||
// max_velocity_shift.
|
||||
optional float max_velocity_scale = 11;
|
||||
// Shift factor for max velocity, to be added to the scaled distance from
|
||||
// center in degrees. Cannot be used with max_velocity and must be used with
|
||||
// max_velocity_scale.
|
||||
optional float max_velocity_shift = 12;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ TEST(KinematicPathSolverTest, FailZeroPixelsPerDegree) {
|
|||
TEST(KinematicPathSolverTest, FailNotInitializedState) {
|
||||
KinematicOptions options;
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
EXPECT_FALSE(solver.GetState(&state).ok());
|
||||
}
|
||||
|
||||
|
@ -55,13 +55,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionLargeImg) {
|
|||
options.set_max_velocity(1000);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move.
|
||||
EXPECT_EQ(state, 500);
|
||||
EXPECT_FLOAT_EQ(state, 500);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
|
||||
|
@ -72,13 +72,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
|
|||
options.set_max_velocity(500);
|
||||
// Set degrees / pixel to 8.3
|
||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||
// Move target by 10px / 8.3 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move.
|
||||
EXPECT_EQ(state, 400);
|
||||
EXPECT_FLOAT_EQ(state, 400);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
||||
|
@ -90,7 +90,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
|||
options.set_filtering_time_window_us(3000000);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
||||
|
@ -98,7 +98,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
|
|||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move.
|
||||
EXPECT_EQ(state, 500);
|
||||
EXPECT_FLOAT_EQ(state, 500);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
||||
|
@ -110,7 +110,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
|||
options.set_filtering_time_window_us(0);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
|
||||
|
@ -118,7 +118,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
|
|||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move.
|
||||
EXPECT_EQ(state, 506);
|
||||
EXPECT_FLOAT_EQ(state, 506.4);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
|
||||
|
@ -130,13 +130,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
|
|||
options.set_max_velocity(1000);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move.
|
||||
EXPECT_EQ(state, 520);
|
||||
EXPECT_FLOAT_EQ(state, 520);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
|
||||
|
@ -148,13 +148,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
|
|||
options.set_max_velocity(18);
|
||||
// Set degrees / pixel to 8.3
|
||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||
// Move target by 10px / 8.3 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move.
|
||||
EXPECT_EQ(state, 410);
|
||||
EXPECT_FLOAT_EQ(state, 410);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, FailReframeWindowSetting) {
|
||||
|
@ -181,13 +181,13 @@ TEST(KinematicPathSolverTest, PassReframeWindow) {
|
|||
options.set_reframe_window(0.75);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 =
|
||||
EXPECT_EQ(state, 508);
|
||||
EXPECT_FLOAT_EQ(state, 507.5);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
|
||||
|
@ -202,17 +202,17 @@ TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
|
|||
options.set_reframe_window(0.75);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move
|
||||
EXPECT_EQ(state, 500);
|
||||
EXPECT_FLOAT_EQ(state, 500);
|
||||
MP_ASSERT_OK(solver.AddObservation(480, kMicroSecInSec * 2));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move
|
||||
EXPECT_EQ(state, 493);
|
||||
EXPECT_FLOAT_EQ(state, 492.5);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassCheckState) {
|
||||
|
@ -241,12 +241,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate30FPS) {
|
|||
options.set_max_update_rate(0.8);
|
||||
options.set_max_velocity(18);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// (0.033 / .25) * 20 =
|
||||
EXPECT_EQ(state, 503);
|
||||
EXPECT_FLOAT_EQ(state, 502.6667);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
|
||||
|
@ -256,12 +256,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
|
|||
options.set_max_update_rate(0.8);
|
||||
options.set_max_velocity(18);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// (0.1 / .25) * 20 =
|
||||
EXPECT_EQ(state, 508);
|
||||
EXPECT_FLOAT_EQ(state, 508);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassUpdateRate) {
|
||||
|
@ -271,7 +271,8 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
|
|||
options.set_max_update_rate(1.0);
|
||||
options.set_max_velocity(18);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state, target_position;
|
||||
int target_position;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||
EXPECT_EQ(target_position, 500);
|
||||
|
@ -279,7 +280,7 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
|
|||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||
EXPECT_EQ(target_position, 520);
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 505);
|
||||
EXPECT_FLOAT_EQ(state, 505);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
||||
|
@ -289,7 +290,8 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
|||
options.set_max_update_rate(1.0);
|
||||
options.set_max_velocity(18);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state, target_position;
|
||||
int target_position;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||
EXPECT_EQ(target_position, 500);
|
||||
|
@ -299,10 +301,10 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
|
|||
MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
|
||||
EXPECT_EQ(target_position, 520 * 0.5);
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 253);
|
||||
EXPECT_FLOAT_EQ(state, 252.5);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
||||
TEST(KinematicPathSolverTest, PassMaxVelocityInt) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_update_rate(1.0);
|
||||
|
@ -315,6 +317,33 @@ TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
|||
EXPECT_EQ(state, 600);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassMaxVelocity) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_update_rate(1.0);
|
||||
options.set_max_velocity(6);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 600);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassMaxVelocityScale) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_update_rate(1.0);
|
||||
options.set_max_velocity_scale(0.4);
|
||||
options.set_max_velocity_shift(-2.0);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 666.6667);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
||||
KinematicOptions options;
|
||||
// Set min motion to 2deg
|
||||
|
@ -323,18 +352,18 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
|
|||
options.set_max_velocity(1000);
|
||||
// Set degrees / pixel to 16.6
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
// Move target by 20px / 16.6 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to not move.
|
||||
EXPECT_EQ(state, 500);
|
||||
EXPECT_FLOAT_EQ(state, 500);
|
||||
MP_ASSERT_OK(solver.UpdatePixelsPerDegree(500.0 / kWidthFieldOfView));
|
||||
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 2));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move.
|
||||
EXPECT_EQ(state, 516);
|
||||
EXPECT_FLOAT_EQ(state, 516);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
||||
|
@ -344,14 +373,14 @@ TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
|
|||
options.set_max_velocity(6);
|
||||
options.set_mean_period_update_rate(1.0);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 600);
|
||||
EXPECT_FLOAT_EQ(state, 600);
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 720);
|
||||
EXPECT_FLOAT_EQ(state, 720);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
||||
|
@ -361,14 +390,14 @@ TEST(KinematicPathSolverTest, TimestampSmoothing) {
|
|||
options.set_max_velocity(6);
|
||||
options.set_mean_period_update_rate(0.05);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(500, 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 600);
|
||||
EXPECT_FLOAT_EQ(state, 600);
|
||||
MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 701);
|
||||
EXPECT_FLOAT_EQ(state, 701);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassSetPosition) {
|
||||
|
@ -380,16 +409,30 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
|
|||
options.set_max_velocity(18);
|
||||
// Set degrees / pixel to 8.3
|
||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||
int state;
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||
// Move target by 10px / 8.3 = 1.2deg
|
||||
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
// Expect cam to move.
|
||||
EXPECT_EQ(state, 410);
|
||||
EXPECT_FLOAT_EQ(state, 410);
|
||||
MP_ASSERT_OK(solver.SetState(400));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_EQ(state, 400);
|
||||
EXPECT_FLOAT_EQ(state, 400);
|
||||
}
|
||||
TEST(KinematicPathSolverTest, PassBorderTest) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(1.0);
|
||||
options.set_max_update_rate(0.25);
|
||||
options.set_max_velocity_scale(0.5);
|
||||
options.set_max_velocity_shift(-1.0);
|
||||
|
||||
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
|
||||
float state;
|
||||
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
|
||||
MP_ASSERT_OK(solver.AddObservation(800, kMicroSecInSec * 0.1));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 404.56668);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -148,18 +148,18 @@ class SourceImpl {
|
|||
|
||||
explicit SourceImpl(std::vector<std::unique_ptr<Base>>* vec)
|
||||
: SourceImpl(&GetWithAutoGrow(vec, 0)) {}
|
||||
explicit SourceImpl(SourceBase* base) : base_(*base) {}
|
||||
explicit SourceImpl(SourceBase* base) : base_(base) {}
|
||||
|
||||
template <typename U,
|
||||
typename std::enable_if<AllowConnection<U>{}, int>::type = 0>
|
||||
Src& AddTarget(const Dst<U>& dest) {
|
||||
CHECK(dest.base_.source == nullptr);
|
||||
dest.base_.source = &base_;
|
||||
base_.dests_.emplace_back(&dest.base_);
|
||||
dest.base_.source = base_;
|
||||
base_->dests_.emplace_back(&dest.base_);
|
||||
return *this;
|
||||
}
|
||||
Src& SetName(std::string name) {
|
||||
base_.name_ = std::move(name);
|
||||
base_->name_ = std::move(name);
|
||||
return *this;
|
||||
}
|
||||
template <typename U>
|
||||
|
@ -168,7 +168,8 @@ class SourceImpl {
|
|||
}
|
||||
|
||||
private:
|
||||
SourceBase& base_;
|
||||
// Never null.
|
||||
SourceBase* base_;
|
||||
};
|
||||
|
||||
template <bool IsSide, typename T>
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "mediapipe/framework/api2/builder.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/packet.h"
|
||||
|
@ -46,6 +48,88 @@ TEST(BuilderTest, BuildGraph) {
|
|||
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||
}
|
||||
|
||||
TEST(BuilderTest, CopyableSource) {
|
||||
builder::Graph graph;
|
||||
builder::Source<false, int> a = graph[Input<int>("A")];
|
||||
a.SetName("a");
|
||||
builder::Source<false, int> b = graph[Input<int>("B")];
|
||||
b.SetName("b");
|
||||
builder::SideSource<false, float> side_a = graph[SideInput<float>("SIDE_A")];
|
||||
side_a.SetName("side_a");
|
||||
builder::SideSource<false, float> side_b = graph[SideInput<float>("SIDE_B")];
|
||||
side_b.SetName("side_b");
|
||||
builder::Destination<false, int> out = graph[Output<int>("OUT")];
|
||||
builder::SideDestination<false, float> side_out =
|
||||
graph[SideOutput<float>("SIDE_OUT")];
|
||||
|
||||
builder::Source<false, int> input = a;
|
||||
input = b;
|
||||
builder::SideSource<false, float> side_input = side_b;
|
||||
side_input = side_a;
|
||||
|
||||
input >> out;
|
||||
side_input >> side_out;
|
||||
|
||||
CalculatorGraphConfig expected =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
input_stream: "A:a"
|
||||
input_stream: "B:b"
|
||||
output_stream: "OUT:b"
|
||||
input_side_packet: "SIDE_A:side_a"
|
||||
input_side_packet: "SIDE_B:side_b"
|
||||
output_side_packet: "SIDE_OUT:side_a"
|
||||
)pb");
|
||||
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||
}
|
||||
|
||||
TEST(BuilderTest, BuildGraphWithFunctions) {
|
||||
builder::Graph graph;
|
||||
|
||||
builder::Source<false, int> base = graph[Input<int>("IN")];
|
||||
base.SetName("base");
|
||||
builder::SideSource<false, float> side = graph[SideInput<float>("SIDE")];
|
||||
side.SetName("side");
|
||||
|
||||
auto foo_fn = [](builder::Source<false, int> base,
|
||||
builder::SideSource<false, float> side,
|
||||
builder::Graph& graph) {
|
||||
auto& foo = graph.AddNode("Foo");
|
||||
base >> foo[Input<int>("BASE")];
|
||||
side >> foo[SideInput<float>("SIDE")];
|
||||
return foo[Output<double>("OUT")];
|
||||
};
|
||||
builder::Source<false, double> foo_out = foo_fn(base, side, graph);
|
||||
|
||||
auto bar_fn = [](builder::Source<false, double> in, builder::Graph& graph) {
|
||||
auto& bar = graph.AddNode("Bar");
|
||||
in >> bar[Input<double>("IN")];
|
||||
return bar[Output<double>("OUT")];
|
||||
};
|
||||
builder::Source<false, double> bar_out = bar_fn(foo_out, graph);
|
||||
bar_out.SetName("out");
|
||||
|
||||
bar_out >> graph[Output<double>("OUT")];
|
||||
|
||||
CalculatorGraphConfig expected =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
input_stream: "IN:base"
|
||||
input_side_packet: "SIDE:side"
|
||||
output_stream: "OUT:out"
|
||||
node {
|
||||
calculator: "Foo"
|
||||
input_stream: "BASE:base"
|
||||
input_side_packet: "SIDE:side"
|
||||
output_stream: "OUT:__stream_0"
|
||||
}
|
||||
node {
|
||||
calculator: "Bar"
|
||||
input_stream: "IN:__stream_0"
|
||||
output_stream: "OUT:out"
|
||||
}
|
||||
)pb");
|
||||
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||
}
|
||||
|
||||
template <class FooT>
|
||||
void BuildGraphTypedTest() {
|
||||
builder::Graph graph;
|
||||
|
|
|
@ -1666,6 +1666,7 @@ TemplateParser::Parser::Parser()
|
|||
allow_partial_(false),
|
||||
allow_case_insensitive_field_(false),
|
||||
allow_unknown_field_(false),
|
||||
allow_unknown_extension_(true),
|
||||
allow_unknown_enum_(false),
|
||||
allow_field_number_(false),
|
||||
allow_relaxed_whitespace_(false),
|
||||
|
@ -1683,12 +1684,11 @@ bool TemplateParser::Parser::Parse(io::ZeroCopyInputStream* input,
|
|||
allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
|
||||
: ParserImpl::FORBID_SINGULAR_OVERWRITES;
|
||||
|
||||
bool allow_unknown_extension = true;
|
||||
int recursion_limit = std::numeric_limits<int>::max();
|
||||
MediaPipeParserImpl parser(
|
||||
output->GetDescriptor(), input, error_collector_, finder_,
|
||||
parse_info_tree_, overwrites_policy, allow_case_insensitive_field_,
|
||||
allow_unknown_field_, allow_unknown_extension, allow_unknown_enum_,
|
||||
allow_unknown_field_, allow_unknown_extension_, allow_unknown_enum_,
|
||||
allow_field_number_, allow_relaxed_whitespace_, allow_partial_,
|
||||
recursion_limit);
|
||||
return MergeUsingImpl(input, output, &parser);
|
||||
|
@ -1702,13 +1702,12 @@ bool TemplateParser::Parser::ParseFromString(const std::string& input,
|
|||
|
||||
bool TemplateParser::Parser::Merge(io::ZeroCopyInputStream* input,
|
||||
Message* output) {
|
||||
bool allow_unknown_extension = true;
|
||||
int recursion_limit = std::numeric_limits<int>::max();
|
||||
MediaPipeParserImpl parser(
|
||||
output->GetDescriptor(), input, error_collector_, finder_,
|
||||
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
||||
allow_case_insensitive_field_, allow_unknown_field_,
|
||||
allow_unknown_extension, allow_unknown_enum_, allow_field_number_,
|
||||
allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
|
||||
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
||||
return MergeUsingImpl(input, output, &parser);
|
||||
}
|
||||
|
@ -1737,13 +1736,12 @@ bool TemplateParser::Parser::MergeUsingImpl(
|
|||
bool TemplateParser::Parser::ParseFieldValueFromString(
|
||||
const std::string& input, const FieldDescriptor* field, Message* output) {
|
||||
io::ArrayInputStream input_stream(input.data(), input.size());
|
||||
bool allow_unknown_extension = true;
|
||||
int recursion_limit = std::numeric_limits<int>::max();
|
||||
ParserImpl parser(
|
||||
output->GetDescriptor(), &input_stream, error_collector_, finder_,
|
||||
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
|
||||
allow_case_insensitive_field_, allow_unknown_field_,
|
||||
allow_unknown_extension, allow_unknown_enum_, allow_field_number_,
|
||||
allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
|
||||
allow_relaxed_whitespace_, allow_partial_, recursion_limit);
|
||||
return parser.ParseField(field, output);
|
||||
}
|
||||
|
|
|
@ -37,6 +37,10 @@ class TemplateParser {
|
|||
Parser();
|
||||
~Parser();
|
||||
|
||||
void set_allow_unknown_extension(bool allow_unknown_extension) {
|
||||
allow_unknown_extension_ = allow_unknown_extension;
|
||||
}
|
||||
|
||||
// Like TextFormat::Parse().
|
||||
bool Parse(proto_ns::io::ZeroCopyInputStream* input,
|
||||
proto_ns::Message* output);
|
||||
|
@ -99,6 +103,7 @@ class TemplateParser {
|
|||
bool allow_partial_;
|
||||
bool allow_case_insensitive_field_;
|
||||
bool allow_unknown_field_;
|
||||
bool allow_unknown_extension_;
|
||||
bool allow_unknown_enum_;
|
||||
bool allow_field_number_;
|
||||
bool allow_relaxed_whitespace_;
|
||||
|
|
|
@ -34,6 +34,13 @@ typedef int DimensionsPacketType[2];
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
constexpr char kLeftRightPaddingTag[] = "LEFT_RIGHT_PADDING";
|
||||
constexpr char kTopBottomPaddingTag[] = "TOP_BOTTOM_PADDING";
|
||||
constexpr char kOptionsTag[] = "OPTIONS";
|
||||
constexpr char kOutputDimensionsTag[] = "OUTPUT_DIMENSIONS";
|
||||
constexpr char kRotationTag[] = "ROTATION";
|
||||
constexpr char kImageTag[] = "IMAGE";
|
||||
|
||||
using Image = mediapipe::Image;
|
||||
|
||||
// Scales, rotates, horizontal or vertical flips the image.
|
||||
|
@ -102,41 +109,41 @@ REGISTER_CALCULATOR(GlScalerCalculator);
|
|||
|
||||
// static
|
||||
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
|
||||
if (cc->Inputs().HasTag("IMAGE")) {
|
||||
cc->Inputs().Tag("IMAGE").Set<Image>();
|
||||
if (cc->Inputs().HasTag(kImageTag)) {
|
||||
cc->Inputs().Tag(kImageTag).Set<Image>();
|
||||
} else {
|
||||
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
|
||||
}
|
||||
if (cc->Outputs().HasTag("IMAGE")) {
|
||||
cc->Outputs().Tag("IMAGE").Set<Image>();
|
||||
if (cc->Outputs().HasTag(kImageTag)) {
|
||||
cc->Outputs().Tag(kImageTag).Set<Image>();
|
||||
} else {
|
||||
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag("ROTATION")) {
|
||||
cc->Inputs().Tag("ROTATION").Set<int>();
|
||||
if (cc->Inputs().HasTag(kRotationTag)) {
|
||||
cc->Inputs().Tag(kRotationTag).Set<int>();
|
||||
}
|
||||
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
||||
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>();
|
||||
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
||||
cc->Inputs().Tag(kOutputDimensionsTag).Set<DimensionsPacketType>();
|
||||
}
|
||||
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
|
||||
|
||||
if (cc->InputSidePackets().HasTag("OPTIONS")) {
|
||||
cc->InputSidePackets().Tag("OPTIONS").Set<GlScalerCalculatorOptions>();
|
||||
if (cc->InputSidePackets().HasTag(kOptionsTag)) {
|
||||
cc->InputSidePackets().Tag(kOptionsTag).Set<GlScalerCalculatorOptions>();
|
||||
}
|
||||
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
|
||||
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
|
||||
.Set<DimensionsPacketType>();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("ROTATION")) {
|
||||
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
||||
// Counterclockwise rotation.
|
||||
cc->InputSidePackets().Tag("ROTATION").Set<int>();
|
||||
cc->InputSidePackets().Tag(kRotationTag).Set<int>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") &&
|
||||
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) {
|
||||
cc->Outputs().Tag("TOP_BOTTOM_PADDING").Set<float>();
|
||||
cc->Outputs().Tag("LEFT_RIGHT_PADDING").Set<float>();
|
||||
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
||||
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
||||
cc->Outputs().Tag(kTopBottomPaddingTag).Set<float>();
|
||||
cc->Outputs().Tag(kLeftRightPaddingTag).Set<float>();
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
@ -187,8 +194,8 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
|
|||
dst_width_ = dimensions[0];
|
||||
dst_height_ = dimensions[1];
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("ROTATION")) {
|
||||
rotation_ccw = cc->InputSidePackets().Tag("ROTATION").Get<int>();
|
||||
if (cc->InputSidePackets().HasTag(kRotationTag)) {
|
||||
rotation_ccw = cc->InputSidePackets().Tag(kRotationTag).Get<int>();
|
||||
}
|
||||
|
||||
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
||||
|
@ -197,22 +204,22 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
|
|||
}
|
||||
|
||||
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
||||
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
|
||||
if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) {
|
||||
if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
|
||||
if (cc->Inputs().Tag(kOutputDimensionsTag).IsEmpty()) {
|
||||
// OUTPUT_DIMENSIONS input stream is specified, but value is missing.
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
const auto& dimensions =
|
||||
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<DimensionsPacketType>();
|
||||
cc->Inputs().Tag(kOutputDimensionsTag).Get<DimensionsPacketType>();
|
||||
dst_width_ = dimensions[0];
|
||||
dst_height_ = dimensions[1];
|
||||
}
|
||||
|
||||
return helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||
const auto& input =
|
||||
cc->Inputs().HasTag("IMAGE")
|
||||
? cc->Inputs().Tag("IMAGE").Get<Image>().GetGpuBuffer()
|
||||
cc->Inputs().HasTag(kImageTag)
|
||||
? cc->Inputs().Tag(kImageTag).Get<Image>().GetGpuBuffer()
|
||||
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
|
||||
QuadRenderer* renderer = nullptr;
|
||||
GlTexture src1;
|
||||
|
@ -254,8 +261,8 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
|||
RET_CHECK(renderer) << "Unsupported input texture type";
|
||||
|
||||
// Override input side packet if ROTATION input packet is provided.
|
||||
if (cc->Inputs().HasTag("ROTATION")) {
|
||||
int rotation_ccw = cc->Inputs().Tag("ROTATION").Get<int>();
|
||||
if (cc->Inputs().HasTag(kRotationTag)) {
|
||||
int rotation_ccw = cc->Inputs().Tag(kRotationTag).Get<int>();
|
||||
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
|
||||
}
|
||||
|
||||
|
@ -263,18 +270,18 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
|||
int dst_height;
|
||||
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
|
||||
|
||||
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") &&
|
||||
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) {
|
||||
if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
|
||||
cc->Outputs().HasTag(kLeftRightPaddingTag)) {
|
||||
float top_bottom_padding;
|
||||
float left_right_padding;
|
||||
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
|
||||
&top_bottom_padding, &left_right_padding);
|
||||
cc->Outputs()
|
||||
.Tag("TOP_BOTTOM_PADDING")
|
||||
.Tag(kTopBottomPaddingTag)
|
||||
.AddPacket(
|
||||
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
|
||||
cc->Outputs()
|
||||
.Tag("LEFT_RIGHT_PADDING")
|
||||
.Tag(kLeftRightPaddingTag)
|
||||
.AddPacket(
|
||||
MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
|
||||
}
|
||||
|
@ -304,9 +311,9 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
|
|||
|
||||
glFlush();
|
||||
|
||||
if (cc->Outputs().HasTag("IMAGE")) {
|
||||
if (cc->Outputs().HasTag(kImageTag)) {
|
||||
auto output = dst.GetFrame<Image>();
|
||||
cc->Outputs().Tag("IMAGE").Add(output.release(), cc->InputTimestamp());
|
||||
cc->Outputs().Tag(kImageTag).Add(output.release(), cc->InputTimestamp());
|
||||
} else {
|
||||
auto output = dst.GetFrame<GpuBuffer>();
|
||||
TagOrIndex(&cc->Outputs(), "VIDEO", 0)
|
||||
|
|
|
@ -24,6 +24,7 @@ package(default_visibility = ["//visibility:public"])
|
|||
cc_library(
|
||||
name = "pose_tracking_gpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_gpu",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_gpu",
|
||||
|
@ -40,6 +41,7 @@ mediapipe_binary_graph(
|
|||
cc_library(
|
||||
name = "pose_tracking_cpu_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
||||
|
|
|
@ -8,6 +8,17 @@ output_stream: "output_video"
|
|||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "pose_landmarks"
|
||||
|
||||
# Generates side packet to enable segmentation.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
|
@ -32,8 +43,10 @@ node {
|
|||
# Subgraph that detects poses and corresponding landmarks.
|
||||
node {
|
||||
calculator: "PoseLandmarkCpu"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||
}
|
||||
|
@ -43,7 +56,8 @@ node {
|
|||
calculator: "PoseRendererCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:pose_landmarks"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
|
|
@ -8,6 +8,17 @@ output_stream: "output_video"
|
|||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "pose_landmarks"
|
||||
|
||||
# Generates side packet to enable segmentation.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: true }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
|
@ -32,8 +43,10 @@ node {
|
|||
# Subgraph that detects poses and corresponding landmarks.
|
||||
node {
|
||||
calculator: "PoseLandmarkGpu"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||
}
|
||||
|
@ -43,7 +56,8 @@ node {
|
|||
calculator: "PoseRendererGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:pose_landmarks"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ mediapipe_simple_subgraph(
|
|||
register_as = "PoseRendererGpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/image:recolor_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
|
@ -41,6 +42,7 @@ mediapipe_simple_subgraph(
|
|||
register_as = "PoseRendererCpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/image:recolor_calculator",
|
||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user