Project import generated by Copybara.

GitOrigin-RevId: 1610e588e497817fae2d9a458093ab6a370e2972
This commit is contained in:
MediaPipe Team 2021-08-18 15:18:12 -07:00 committed by jqtang
parent b899d17f18
commit 710fb3de58
158 changed files with 10104 additions and 1568 deletions

View File

@ -331,7 +331,9 @@ load("@rules_jvm_external//:defs.bzl", "maven_install")
maven_install( maven_install(
artifacts = [ artifacts = [
"androidx.concurrent:concurrent-futures:1.0.0-alpha03", "androidx.concurrent:concurrent-futures:1.0.0-alpha03",
"androidx.lifecycle:lifecycle-common:2.2.0", "androidx.lifecycle:lifecycle-common:2.3.1",
"androidx.activity:activity:1.2.2",
"androidx.fragment:fragment:1.3.4",
"androidx.annotation:annotation:aar:1.1.0", "androidx.annotation:annotation:aar:1.1.0",
"androidx.appcompat:appcompat:aar:1.1.0-rc01", "androidx.appcompat:appcompat:aar:1.1.0-rc01",
"androidx.camera:camera-core:1.0.0-beta10", "androidx.camera:camera-core:1.0.0-beta10",
@ -376,9 +378,9 @@ http_archive(
) )
# Tensorflow repo should always go after the other external dependencies. # Tensorflow repo should always go after the other external dependencies.
# 2021-06-07 # 2021-07-29
_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec" _TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3"
_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126" _TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56"
http_archive( http_archive(
name = "org_tensorflow", name = "org_tensorflow",
urls = [ urls = [
@ -399,3 +401,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
tf_workspace3() tf_workspace3()
load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2") load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
tf_workspace2() tf_workspace2()
# Edge TPU
http_archive(
name = "libedgetpu",
sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80",
strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7",
urls = [
"https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz"
],
)
load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies")
libedgetpu_dependencies()
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
cc_crosstool(name = "crosstool")

View File

@ -16,12 +16,14 @@ nav_order: 1
Please follow instructions below to build Android example apps in the supported Please follow instructions below to build Android example apps in the supported
MediaPipe [solutions](../solutions/solutions.md). To learn more about these MediaPipe [solutions](../solutions/solutions.md). To learn more about these
example apps, start from [Hello World! on Android](./hello_world_android.md). To example apps, start from [Hello World! on Android](./hello_world_android.md).
incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
## Building Android example apps To incorporate MediaPipe into Android Studio projects, see these
[instructions](./android_solutions.md) to use the MediaPipe Android Solution
APIs (currently in alpha) that are now available in
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
## Building Android example apps with Bazel
### Prerequisite ### Prerequisite
@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or
android_sdk_repository() in the android_sdk_repository() in the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file. [`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file.
Please verify all the necessary packages are installed.
* Android SDK Platform API Level 28 or 29
* Android SDK Build-Tools 28 or 29
* Android SDK Platform-Tools 28 or 29
* Android SDK Tools 26.1.1
* Android NDK 19c or above
### Option 1: Build with Bazel in Command Line
Tip: You can run this Tip: You can run this
[script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh) [script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh)
to build (and install) all MediaPipe Android example apps. to build (and install) all MediaPipe Android example apps.
@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps.
```bash ```bash
adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk
``` ```
### Option 2: Build with Bazel in Android Studio
The MediaPipe project can be imported into Android Studio using the Bazel
plugins. This allows the MediaPipe examples to be built and modified in Android
Studio.
To incorporate MediaPipe into an existing Android Studio project, see these
[instructions](./android_archive_library.md) that use Android Archive (AAR) and
Gradle.
The steps below use Android Studio 3.5 to build and install a MediaPipe example
app:
1. Install and launch Android Studio 3.5.
2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`.
* Verify that Android SDK Platform API Level 28 or 29 is installed.
* Take note of the Android SDK Location, e.g.,
`/usr/local/home/Android/Sdk`.
3. Select `Configure` -> `SDK Manager` -> `SDK Tools`.
* Verify that Android SDK Build-Tools 28 or 29 is installed.
* Verify that Android SDK Platform-Tools 28 or 29 is installed.
* Verify that Android SDK Tools 26.1.1 is installed.
* Verify that Android NDK 19c or above is installed.
* Take note of the Android NDK Location, e.g.,
`/usr/local/home/Android/Sdk/ndk-bundle` or
`/usr/local/home/Android/Sdk/ndk/20.0.5594570`.
4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point
to the installed SDK and NDK.
```bash
export ANDROID_HOME=/usr/local/home/Android/Sdk
# If the NDK libraries are installed by a previous version of Android Studio, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle
# If the NDK libraries are installed by Android Studio 3.5, do
export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/<version number>
```
5. Select `Configure` -> `Plugins` to install `Bazel`.
6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select
`Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel
binary location` to be the same as the output of `$ which bazel`.
7. Select `Import Bazel Project`.
* Select `Workspace`: `/path/to/mediapipe` and select `Next`.
* Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select
`Next`.
* Modify `Project View` to be the following and select `Finish`.
```
directories:
# read project settings, e.g., .bazelrc
.
-mediapipe/objc
-mediapipe/examples/ios
targets:
//mediapipe/examples/android/...:all
//mediapipe/java/...:all
android_sdk_platform: android-29
sync_flags:
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain
```
8. Select `Bazel` -> `Sync` -> `Sync project with Build files`.
Note: Even after doing step 4, if you still see the error: `"no such package
'@androidsdk//': Either the path attribute of android_sdk_repository or the
ANDROID_HOME environment variable must be set."`, please modify the
[`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE)
file to point to your SDK and NDK library locations, as below:
```
android_sdk_repository(
name = "androidsdk",
path = "/path/to/android/sdk"
)
android_ndk_repository(
name = "androidndk",
path = "/path/to/android/ndk"
)
```
9. Connect an Android device to the workstation.
10. Select `Run...` -> `Edit Configurations...`.
* Select `Templates` -> `Bazel Command`.
* Enter Target Expression:
`//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu`
* Enter Bazel command: `mobile-install`.
* Enter Bazel flags: `-c opt --config=android_arm64`.
* Press the `[+]` button to add the new configuration.
* Select `Run` to run the example app on the connected Android device.

View File

@ -3,7 +3,7 @@ layout: default
title: MediaPipe Android Archive title: MediaPipe Android Archive
parent: MediaPipe on Android parent: MediaPipe on Android
grand_parent: Getting Started grand_parent: Getting Started
nav_order: 2 nav_order: 3
--- ---
# MediaPipe Android Archive # MediaPipe Android Archive

View File

@ -0,0 +1,79 @@
---
layout: default
title: Android Solutions
parent: MediaPipe on Android
grand_parent: Getting Started
nav_order: 2
---
# Android Solution APIs
{: .no_toc }
1. TOC
{:toc}
---
Please follow instructions below to use the MediaPipe Solution APIs in Android
Studio projects and build the Android example apps in the supported MediaPipe
[solutions](../solutions/solutions.md).
## Integrate MediaPipe Android Solutions in Android Studio
MediaPipe Android Solution APIs (currently in alpha) are now available in
[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe).
To incorporate MediaPipe Android Solutions into an Android Studio project, add
the following into the project's Gradle dependencies:
```
dependencies {
// MediaPipe solution-core is the foundation of any MediaPipe solutions.
implementation 'com.google.mediapipe:solution-core:latest.release'
// Optional: MediaPipe Hands solution.
implementation 'com.google.mediapipe:hands:latest.release'
// Optional: MediaPipe FaceMesh solution.
implementation 'com.google.mediapipe:facemesh:latest.release'
// MediaPipe deps
implementation 'com.google.flogger:flogger:latest.release'
implementation 'com.google.flogger:flogger-system-backend:latest.release'
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library
def camerax_version = "1.0.0-beta10"
implementation "androidx.camera:camera-core:$camerax_version"
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
}
```
See the detailed solutions API usage examples for different use cases in the
solution example apps'
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions).
If the prebuilt maven packages are not sufficient, building the MediaPipe
Android archive library locally by following these
[instructions](./android_archive_library.md).
## Build solution example apps in Android Studio
1. Open Android Studio Arctic Fox on Linux, macOS, or Windows.
2. Import mediapipe/examples/android/solutions directory into Android Studio.
![Screenshot](../images/import_mp_android_studio_project.png)
3. For Windows users, run `create_win_symlinks.bat` as administrator to create
res directory symlinks.
![Screenshot](../images/run_create_win_symlinks.png)
4. Select "File" -> "Sync Project with Gradle Files" to sync project.
5. Run solution example app in Android Studio.
![Screenshot](../images/run_android_solution_app.png)
6. (Optional) Run solutions on CPU.
MediaPipe solution example apps run the pipeline and the model inference on
GPU by default. If needed, for example to run the apps on Android Emulator,
set the `RUN_ON_GPU` boolean variable to `false` in the app's
MainActivity.java to run the pipeline and the model inference on CPU.

View File

@ -43,104 +43,189 @@ install --user six`.
3. Install OpenCV and FFmpeg. 3. Install OpenCV and FFmpeg.
Option 1. Use package manager tool to install the pre-compiled OpenCV **Option 1**. Use package manager tool to install the pre-compiled OpenCV
libraries. FFmpeg will be installed via libopencv-video-dev. libraries. FFmpeg will be installed via `libopencv-video-dev`.
Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take OS | OpenCV
option 2 or 3 to install OpenCV 3 or above. -------------------- | ------
Debian 9 (stretch) | 2.4
Debian 10 (buster) | 3.2
Debian 11 (bullseye) | 4.5
Ubuntu 16.04 LTS | 2.4
Ubuntu 18.04 LTS | 3.2
Ubuntu 20.04 LTS | 4.2
Ubuntu 20.04 LTS | 4.2
Ubuntu 21.04 | 4.5
```bash ```bash
$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \ $ sudo apt-get install -y \
libopencv-calib3d-dev libopencv-features2d-dev \ libopencv-core-dev \
libopencv-imgproc-dev libopencv-video-dev libopencv-highgui-dev \
libopencv-calib3d-dev \
libopencv-features2d-dev \
libopencv-imgproc-dev \
libopencv-video-dev
``` ```
Debian 9 and Ubuntu 18.04 install the packages in MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured
`/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and for OpenCV 2/3 and should work correctly on any architecture:
[`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04
may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow
the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and
[`ffmpeg_linux.BUILD`] files accordingly.
Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the
library path needs to be modified like the following:
```bash ```bash
sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD # WORKSPACE
new_local_repository(
name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr",
)
# opencv_linux.BUILD for OpenCV 2/3 installed from Debian package
cc_library(
name = "opencv",
linkopts = [
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
``` ```
Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account
and modify MediaPipe's OpenCV config. current architecture:
Option 3. Follow OpenCV's ```bash
# WORKSPACE
new_local_repository(
name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr",
)
# opencv_linux.BUILD for OpenCV 4 installed from Debian package
cc_library(
name = "opencv",
hdrs = glob([
# Uncomment according to your multiarch value (gcc -print-multiarch):
# "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h",
# "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h",
# "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h",
"include/opencv4/opencv2/**/*.h*",
]),
includes = [
# Uncomment according to your multiarch value (gcc -print-multiarch):
# "include/aarch64-linux-gnu/opencv4/",
# "include/arm-linux-gnueabihf/opencv4/",
# "include/x86_64-linux-gnu/opencv4/",
"include/opencv4/",
],
linkopts = [
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
```
**Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from
source and modify MediaPipe's OpenCV config. This option will do all steps
defined in Option 3 automatically.
**Option 3**. Follow OpenCV's
[documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html)
to manually build OpenCV from source code. to manually build OpenCV from source code.
Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point
[`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to
libraries. For example if OpenCV and FFmpeg are both manually installed in `/usr/local/` which is recommended by default.
"/usr/local/", you will need to update: (1) the "linux_opencv" and
"linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv" OpenCV 2/3 setup:
cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg"
cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below:
```bash ```bash
# WORKSPACE
new_local_repository( new_local_repository(
name = "linux_opencv", name = "linux_opencv",
build_file = "@//third_party:opencv_linux.BUILD", build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr/local", path = "/usr/local",
) )
# opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local
cc_library(
name = "opencv",
linkopts = [
"-L/usr/local/lib",
"-l:libopencv_core.so",
"-l:libopencv_calib3d.so",
"-l:libopencv_features2d.so",
"-l:libopencv_highgui.so",
"-l:libopencv_imgcodecs.so",
"-l:libopencv_imgproc.so",
"-l:libopencv_video.so",
"-l:libopencv_videoio.so",
],
)
```
OpenCV 4 setup:
```bash
# WORKSPACE
new_local_repository( new_local_repository(
name = "linux_ffmpeg", name = "linux_opencv",
build_file = "@//third_party:ffmpeg_linux.BUILD", build_file = "@//third_party:opencv_linux.BUILD",
path = "/usr/local", path = "/usr/local",
) )
# opencv_linux.BUILD for OpenCV 4 installed to /usr/local
cc_library( cc_library(
name = "opencv", name = "opencv",
srcs = glob( hdrs = glob([
[ "include/opencv4/opencv2/**/*.h*",
"lib/libopencv_core.so", ]),
"lib/libopencv_highgui.so", includes = [
"lib/libopencv_imgcodecs.so", "include/opencv4/",
"lib/libopencv_imgproc.so", ],
"lib/libopencv_video.so", linkopts = [
"lib/libopencv_videoio.so", "-L/usr/local/lib",
], "-l:libopencv_core.so",
), "-l:libopencv_calib3d.so",
hdrs = glob([ "-l:libopencv_features2d.so",
# For OpenCV 3.x "-l:libopencv_highgui.so",
"include/opencv2/**/*.h*", "-l:libopencv_imgcodecs.so",
# For OpenCV 4.x "-l:libopencv_imgproc.so",
# "include/opencv4/opencv2/**/*.h*", "-l:libopencv_video.so",
]), "-l:libopencv_videoio.so",
includes = [ ],
# For OpenCV 3.x )
"include/", ```
# For OpenCV 4.x
# "include/opencv4/", Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work
], for any architecture:
linkstatic = 1,
visibility = ["//visibility:public"], ```bash
# WORKSPACE
new_local_repository(
name = "linux_ffmpeg",
build_file = "@//third_party:ffmpeg_linux.BUILD",
path = "/usr"
) )
# ffmpeg_linux.BUILD for FFmpeg installed from Debian package
cc_library( cc_library(
name = "libffmpeg", name = "libffmpeg",
srcs = glob( linkopts = [
[ "-l:libavcodec.so",
"lib/libav*.so", "-l:libavformat.so",
], "-l:libavutil.so",
), ],
hdrs = glob(["include/libav*/*.h"]),
includes = ["include"],
linkopts = [
"-lavcodec",
"-lavformat",
"-lavutil",
],
linkstatic = 1,
visibility = ["//visibility:public"],
) )
``` ```

View File

@ -29,6 +29,16 @@ Solution | NPM Package | Example
Click on a solution link above for more information, including API and code Click on a solution link above for more information, including API and code
snippets. snippets.
### Supported plaforms:
| Browser | Platform | Notes |
| ------- | ----------------------- | -------------------------------------- |
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
| | | unsupported. |
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
| | | MacBook |
The quickest way to get acclimated is to look at the examples above. Each demo The quickest way to get acclimated is to look at the examples above. Each demo
has a link to a [CodePen][codepen] so that you can edit the code and try it has a link to a [CodePen][codepen] so that you can edit the code and try it
yourself. We have included a number of utility packages to help you get started: yourself. We have included a number of utility packages to help you get started:

Binary file not shown.

After

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -278,6 +278,7 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh mp_face_mesh = mp.solutions.face_mesh
# For static images: # For static images:
@ -301,9 +302,17 @@ with mp_face_mesh.FaceMesh(
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image=annotated_image, image=annotated_image,
landmark_list=face_landmarks, landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS, connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec, landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec) connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input: # For webcam input:
@ -335,9 +344,17 @@ with mp_face_mesh.FaceMesh(
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image=image, image=image,
landmark_list=face_landmarks, landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS, connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=drawing_spec, landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec) connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
cv2.imshow('MediaPipe FaceMesh', image) cv2.imshow('MediaPipe FaceMesh', image)
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
@ -423,6 +440,200 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
to add MediaPipe Gradle dependencies, then try the FaceMash solution API in the
companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh)
following
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
and learn more in the usage example below.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [maxNumFaces](#max_num_faces)
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
.setMaxNumFaces(1) // setStaticImageMode(false)
.setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
facemesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> facemesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
facemesh.setResultListener(
faceMeshResult -> {
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
facemesh.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE) // API soon to become
.setMaxNumFaces(1) // setStaticImageMode(true)
.setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
// Connects MediaPipe FaceMesh to the user-defined ImageView instance that allows
// users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
// as an example.
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
facemesh.setResultListener(
faceMeshResult -> {
int width = faceMeshResult.inputBitmap().getWidth();
int height = faceMeshResult.inputBitmap().getHeight();
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
noseLandmark.getX() * width, noseLandmark.getY() * height));
// Request canvas drawing.
imageView.setFaceMeshResult(faceMeshResult);
runOnUiThread(() -> imageView.update());
});
facemesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
facemesh.send(bitmap);
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become
.setMaxNumFaces(1) // setStaticImageMode(false)
.setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions);
facemesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> facemesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
facemesh.setResultListener(
faceMeshResult -> {
NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i(
TAG,
String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
facemesh.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -219,8 +219,8 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands mp_hands = mp.solutions.hands
drawing_styles = mp.solutions.drawing_styles
# For static images: # For static images:
IMAGE_FILES = [] IMAGE_FILES = []
@ -249,9 +249,11 @@ with mp_hands.Hands(
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})' f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
) )
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, annotated_image,
drawing_styles.get_default_hand_landmark_style(), hand_landmarks,
drawing_styles.get_default_hand_connection_style()) mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite( cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1)) '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
@ -281,9 +283,11 @@ with mp_hands.Hands(
if results.multi_hand_landmarks: if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, hand_landmarks, mp_hands.HAND_CONNECTIONS, image,
drawing_styles.get_default_hand_landmark_style(), hand_landmarks,
drawing_styles.get_default_hand_connection_style()) mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
cv2.imshow('MediaPipe Hands', image) cv2.imshow('MediaPipe Hands', image)
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
@ -364,6 +368,200 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
to add MediaPipe Gradle dependencies, then try the Hands solution API in the
companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands)
following
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
and learn more in usage example below.
Supported configuration options:
* [staticImageMode](#static_image_mode)
* [maxNumHands](#max_num_hands)
* runOnGpu: Run the pipeline and the model inference on GPU or CPU.
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
HandsOptions handsOptions =
HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
.setMaxNumHands(1) // setStaticImageMode(false)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<HandsResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, hands.getGlContext(), hands.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(handsResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
hands.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
HandsOptions handsOptions =
HandsOptions.builder()
.setMode(HandsOptions.STATIC_IMAGE_MODE) // API soon to become
.setMaxNumHands(1) // setStaticImageMode(true)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
// Connects MediaPipe Hands to the user-defined ImageView instance that allows
// users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
// as an example.
HandsResultImageView imageView = new HandsResultImageView(this);
hands.setResultListener(
handsResult -> {
int width = handsResult.inputBitmap().getWidth();
int height = handsResult.inputBitmap().getHeight();
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
wristLandmark.getX() * width, wristLandmark.getY() * height));
// Request canvas drawing.
imageView.setHandsResult(handsResult);
runOnUiThread(() -> imageView.update());
});
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
hands.send(bitmap);
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
HandsOptions handsOptions =
HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE) // API soon to become
.setMaxNumHands(1) // setStaticImageMode(false)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<HandsResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<HandsResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, hands.getGlContext(), hands.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(handsResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
hands.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -225,6 +225,7 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic mp_holistic = mp.solutions.holistic
# For static images: # For static images:
@ -247,13 +248,18 @@ with mp_holistic.Holistic(
# Draw pose, left and right hands, and face landmarks on the image. # Draw pose, left and right hands, and face landmarks on the image.
annotated_image = image.copy() annotated_image = image.copy()
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) annotated_image,
results.face_landmarks,
mp_holistic.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) annotated_image,
mp_drawing.draw_landmarks( results.pose_landmarks,
annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_holistic.POSE_CONNECTIONS,
mp_drawing.draw_landmarks( landmark_drawing_spec=mp_drawing_styles.
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) get_default_pose_landmarks_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks. # Plot pose world landmarks.
mp_drawing.plot_landmarks( mp_drawing.plot_landmarks(
@ -283,13 +289,18 @@ with mp_holistic.Holistic(
image.flags.writeable = True image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) image,
results.face_landmarks,
mp_holistic.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) image,
mp_drawing.draw_landmarks( results.pose_landmarks,
image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) mp_holistic.POSE_CONNECTIONS,
mp_drawing.draw_landmarks( landmark_drawing_spec=mp_drawing_styles
image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) .get_default_pose_landmarks_style())
cv2.imshow('MediaPipe Holistic', image) cv2.imshow('MediaPipe Holistic', image)
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break

View File

@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in
augmented reality. augmented reality.
MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring
33 3D landmarks on the whole body from RGB video frames utilizing our 33 3D landmarks and background segmentation mask on the whole body from RGB
video frames utilizing our
[BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) [BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
research that also powers the research that also powers the
[ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection). [ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection).
@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be
effective in our [MediaPipe Hands](./hands.md) and effective in our [MediaPipe Hands](./hands.md) and
[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline [MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline
first locates the person/pose region-of-interest (ROI) within the frame. The first locates the person/pose region-of-interest (ROI) within the frame. The
tracker subsequently predicts the pose landmarks within the ROI using the tracker subsequently predicts the pose landmarks and segmentation mask within
ROI-cropped frame as input. Note that for video use cases the detector is the ROI using the ROI-cropped frame as input. Note that for video use cases the
invoked only as needed, i.e., for the very first frame and when the tracker detector is invoked only as needed, i.e., for the very first frame and when the
could no longer identify body pose presence in the previous frame. For other tracker could no longer identify body pose presence in the previous frame. For
frames the pipeline simply derives the ROI from the previous frames pose other frames the pipeline simply derives the ROI from the previous frames pose
landmarks. landmarks.
The pipeline is implemented as a MediaPipe The pipeline is implemented as a MediaPipe
@ -129,16 +130,19 @@ hip midpoints.
The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
(see figure below). (see figure below).
Please find more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
this [paper](https://arxiv.org/abs/2006.10204) and
[the model card](./models.md#pose), and the attributes in each landmark
[below](#pose_landmarks).
![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) | ![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) |
:----------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------: |
*Fig 4. 33 pose landmarks.* | *Fig 4. 33 pose landmarks.* |
Optionally, MediaPipe Pose can predicts a full-body
[segmentation mask](#segmentation_mask) represented as a two-class segmentation
(human or background).
Please find more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html),
this [paper](https://arxiv.org/abs/2006.10204),
[the model card](./models.md#pose) and the [Output](#Output) section below.
## Solution APIs ## Solution APIs
### Cross-platform Configuration Options ### Cross-platform Configuration Options
@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode) images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`. is also set to `true`. Default to `true`.
#### enable_segmentation
If set to `true`, in addition to the pose landmarks the solution also generates
the segmentation mask. Default to `false`.
#### smooth_segmentation
If set to `true`, the solution filters segmentation masks across different input
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -211,6 +227,19 @@ the following:
* `visibility`: Identical to that defined in the corresponding * `visibility`: Identical to that defined in the corresponding
[pose_landmarks](#pose_landmarks). [pose_landmarks](#pose_landmarks).
#### segmentation_mask
The output segmentation mask, predicted only when
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
same width and height as the input image, and contains values in `[0.0, 1.0]`
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
pixel respectively. Please refer to the platform-specific usage examples below
for usage details.
*Fig 6. Example of MediaPipe Pose segmentation mask.* |
:-----------------------------------------------------------: |
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_segmentation.mp4" type="video/mp4"></video> |
### Python Solution API ### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to Please first follow general [instructions](../getting_started/python.md) to
@ -222,6 +251,8 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [model_complexity](#model_complexity) * [model_complexity](#model_complexity)
* [smooth_landmarks](#smooth_landmarks) * [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -229,13 +260,16 @@ Supported configuration options:
import cv2 import cv2
import mediapipe as mp import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose mp_pose = mp.solutions.pose
# For static images: # For static images:
IMAGE_FILES = [] IMAGE_FILES = []
BG_COLOR = (192, 192, 192) # gray
with mp_pose.Pose( with mp_pose.Pose(
static_image_mode=True, static_image_mode=True,
model_complexity=2, model_complexity=2,
enable_segmentation=True,
min_detection_confidence=0.5) as pose: min_detection_confidence=0.5) as pose:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
@ -250,10 +284,21 @@ with mp_pose.Pose(
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, ' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
) )
# Draw pose landmarks on the image.
annotated_image = image.copy() annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose landmarks on the image.
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) annotated_image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks. # Plot pose world landmarks.
mp_drawing.plot_landmarks( mp_drawing.plot_landmarks(
@ -283,7 +328,10 @@ with mp_pose.Pose(
image.flags.writeable = True image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imshow('MediaPipe Pose', image) cv2.imshow('MediaPipe Pose', image)
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
@ -300,6 +348,8 @@ Supported configuration options:
* [modelComplexity](#model_complexity) * [modelComplexity](#model_complexity)
* [smoothLandmarks](#smooth_landmarks) * [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -340,8 +390,20 @@ function onResults(results) {
canvasCtx.save(); canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height); canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(results.segmentationMask, 0, 0,
canvasElement.width, canvasElement.height);
// Only overwrite existing pixels.
canvasCtx.globalCompositeOperation = 'source-in';
canvasCtx.fillStyle = '#00FF00';
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
// Only overwrite missing pixels.
canvasCtx.globalCompositeOperation = 'destination-atop';
canvasCtx.drawImage( canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height); results.image, 0, 0, canvasElement.width, canvasElement.height);
canvasCtx.globalCompositeOperation = 'source-over';
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS, drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4}); {color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks, drawLandmarks(canvasCtx, results.poseLandmarks,
@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => {
pose.setOptions({ pose.setOptions({
modelComplexity: 1, modelComplexity: 1,
smoothLandmarks: true, smoothLandmarks: true,
enableSegmentation: true,
smoothSegmentation: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });

View File

@ -64,8 +64,9 @@ std::string ToString(GateState state) {
// ALLOW or DISALLOW can also be specified as an input side packet. The rules // ALLOW or DISALLOW can also be specified as an input side packet. The rules
// for evaluation remain the same as above. // for evaluation remain the same as above.
// //
// ALLOW/DISALLOW inputs must be specified either using input stream or // ALLOW/DISALLOW inputs must be specified either using input stream or via
// via input side packet but not both. // input side packet but not both. If neither is specified, the behavior is then
// determined by the "allow" field in the calculator options.
// //
// Intended to be used with the default input stream handler, which synchronizes // Intended to be used with the default input stream handler, which synchronizes
// all data input streams with the ALLOW/DISALLOW control input stream. // all data input streams with the ALLOW/DISALLOW control input stream.
@ -92,20 +93,22 @@ class GateCalculator : public CalculatorBase {
cc->InputSidePackets().HasTag(kDisallowTag); cc->InputSidePackets().HasTag(kDisallowTag);
bool input_via_stream = bool input_via_stream =
cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag); cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag);
// Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW
// input.
RET_CHECK(input_via_side_packet ^ input_via_stream);
// Only one of input_side_packet or input_stream may specify
// ALLOW/DISALLOW input.
if (input_via_side_packet) { if (input_via_side_packet) {
RET_CHECK(!input_via_stream);
RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^ RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^
cc->InputSidePackets().HasTag(kDisallowTag)); cc->InputSidePackets().HasTag(kDisallowTag));
if (cc->InputSidePackets().HasTag(kAllowTag)) { if (cc->InputSidePackets().HasTag(kAllowTag)) {
cc->InputSidePackets().Tag(kAllowTag).Set<bool>(); cc->InputSidePackets().Tag(kAllowTag).Set<bool>().Optional();
} else { } else {
cc->InputSidePackets().Tag(kDisallowTag).Set<bool>(); cc->InputSidePackets().Tag(kDisallowTag).Set<bool>().Optional();
} }
} else { }
if (input_via_stream) {
RET_CHECK(!input_via_side_packet);
RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^ RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^
cc->Inputs().HasTag(kDisallowTag)); cc->Inputs().HasTag(kDisallowTag));
@ -139,7 +142,6 @@ class GateCalculator : public CalculatorBase {
} }
absl::Status Open(CalculatorContext* cc) final { absl::Status Open(CalculatorContext* cc) final {
use_side_packet_for_allow_disallow_ = false;
if (cc->InputSidePackets().HasTag(kAllowTag)) { if (cc->InputSidePackets().HasTag(kAllowTag)) {
use_side_packet_for_allow_disallow_ = true; use_side_packet_for_allow_disallow_ = true;
allow_by_side_packet_decision_ = allow_by_side_packet_decision_ =
@ -158,12 +160,20 @@ class GateCalculator : public CalculatorBase {
const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>();
empty_packets_as_allow_ = options.empty_packets_as_allow(); empty_packets_as_allow_ = options.empty_packets_as_allow();
if (!use_side_packet_for_allow_disallow_ &&
!cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) {
use_option_for_allow_disallow_ = true;
allow_by_option_decision_ = options.allow();
}
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
bool allow = empty_packets_as_allow_; bool allow = empty_packets_as_allow_;
if (use_side_packet_for_allow_disallow_) { if (use_option_for_allow_disallow_) {
allow = allow_by_option_decision_;
} else if (use_side_packet_for_allow_disallow_) {
allow = allow_by_side_packet_decision_; allow = allow_by_side_packet_decision_;
} else { } else {
if (cc->Inputs().HasTag(kAllowTag) && if (cc->Inputs().HasTag(kAllowTag) &&
@ -217,8 +227,10 @@ class GateCalculator : public CalculatorBase {
GateState last_gate_state_ = GATE_UNINITIALIZED; GateState last_gate_state_ = GATE_UNINITIALIZED;
int num_data_streams_; int num_data_streams_;
bool empty_packets_as_allow_; bool empty_packets_as_allow_;
bool use_side_packet_for_allow_disallow_; bool use_side_packet_for_allow_disallow_ = false;
bool allow_by_side_packet_decision_; bool allow_by_side_packet_decision_;
bool use_option_for_allow_disallow_ = false;
bool allow_by_option_decision_;
}; };
REGISTER_CALCULATOR(GateCalculator); REGISTER_CALCULATOR(GateCalculator);

View File

@ -29,4 +29,8 @@ message GateCalculatorOptions {
// disallowing the corresponding packets in the data input streams. Setting // disallowing the corresponding packets in the data input streams. Setting
// this option to true inverts that, allowing the data packets to go through. // this option to true inverts that, allowing the data packets to go through.
optional bool empty_packets_as_allow = 1; optional bool empty_packets_as_allow = 1;
// Whether to allow or disallow the input streams to pass when no
// ALLOW/DISALLOW input or side input is specified.
optional bool allow = 2 [default = false];
} }

View File

@ -113,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) {
)"))); )")));
} }
TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(2, output.size());
EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value());
EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value());
EXPECT_EQ(true, output[0].Get<bool>());
EXPECT_EQ(false, output[1].Get<bool>());
}
TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: false
}
}
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(0, output.size());
}
TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) {
SetRunner(R"(
calculator: "GateCalculator"
input_stream: "test_input"
output_stream: "test_output"
)");
constexpr int64 kTimestampValue0 = 42;
RunTimeStep(kTimestampValue0, true);
constexpr int64 kTimestampValue1 = 43;
RunTimeStep(kTimestampValue1, false);
const std::vector<Packet>& output = runner()->Outputs().Get("", 0).packets;
ASSERT_EQ(0, output.size());
}
TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) { TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) {
SetRunner(R"( SetRunner(R"(
calculator: "GateCalculator" calculator: "GateCalculator"

View File

@ -661,3 +661,138 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:parse_text_proto",
], ],
) )
cc_library(
name = "affine_transformation",
hdrs = ["affine_transformation.h"],
deps = ["@com_google_absl//absl/status:statusor"],
)
cc_library(
name = "affine_transformation_runner_gl",
srcs = ["affine_transformation_runner_gl.cc"],
hdrs = ["affine_transformation_runner_gl.h"],
deps = [
":affine_transformation",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:ret_check",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_origin_cc_proto",
"//mediapipe/gpu:shader_util",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "affine_transformation_runner_opencv",
srcs = ["affine_transformation_runner_opencv.cc"],
hdrs = ["affine_transformation_runner_opencv.h"],
deps = [
":affine_transformation",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status:statusor",
"@eigen_archive//:eigen3",
],
)
mediapipe_proto_library(
name = "warp_affine_calculator_proto",
srcs = ["warp_affine_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/gpu:gpu_origin_proto",
],
)
cc_library(
name = "warp_affine_calculator",
srcs = ["warp_affine_calculator.cc"],
hdrs = ["warp_affine_calculator.h"],
visibility = ["//visibility:public"],
deps = [
":affine_transformation",
":affine_transformation_runner_opencv",
":warp_affine_calculator_cc_proto",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
":affine_transformation_runner_gl",
],
}),
alwayslink = 1,
)
cc_test(
name = "warp_affine_calculator_test",
srcs = ["warp_affine_calculator_test.cc"],
data = [
"//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png",
"//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png",
],
tags = ["desktop_only_test"],
deps = [
":affine_transformation",
":warp_affine_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_converter",
"//mediapipe/calculators/tensor:image_to_tensor_utils",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/calculators/util:to_image_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
)

View File

@ -0,0 +1,55 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_
#include <array>
#include "absl/status/statusor.h"
namespace mediapipe {
class AffineTransformation {
public:
// Pixel extrapolation method.
// When converting image to tensor it may happen that tensor needs to read
// pixels outside image boundaries. Border mode helps to specify how such
// pixels will be calculated.
enum class BorderMode { kZero, kReplicate };
struct Size {
int width;
int height;
};
template <typename InputT, typename OutputT>
class Runner {
public:
virtual ~Runner() = default;
// Transforms input into output using @matrix as following:
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
// matrix[4] * x + matrix[5] * y + matrix[7])
// where x and y ranges are defined by @output_size.
virtual absl::StatusOr<OutputT> Run(const InputT& input,
const std::array<float, 16>& matrix,
const Size& output_size,
BorderMode border_mode) = 0;
};
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_

View File

@ -0,0 +1,354 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
#include <memory>
#include <optional>
#include "Eigen/Core"
#include "Eigen/Geometry"
#include "Eigen/LU"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#include "mediapipe/gpu/shader_util.h"
namespace mediapipe {
namespace {
using mediapipe::GlCalculatorHelper;
using mediapipe::GlhCreateProgram;
using mediapipe::GlTexture;
using mediapipe::GpuBuffer;
using mediapipe::GpuOrigin;
bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) {
switch (gpu_origin) {
case GpuOrigin::DEFAULT:
case GpuOrigin::CONVENTIONAL:
#ifdef __APPLE__
return false;
#else
return true;
#endif // __APPLE__
case GpuOrigin::TOP_LEFT:
return false;
}
}
#ifdef __APPLE__
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0
#else
#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1
#endif // __APPLE__
bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
return gl_context.gl_major_version() > 3 ||
(gl_context.gl_major_version() == 3 &&
gl_context.gl_minor_version() >= 2);
}
constexpr int kAttribVertex = 0;
constexpr int kAttribTexturePosition = 1;
constexpr int kNumAttributes = 2;
class GlTextureWarpAffineRunner
: public AffineTransformation::Runner<GpuBuffer,
std::unique_ptr<GpuBuffer>> {
public:
GlTextureWarpAffineRunner(std::shared_ptr<GlCalculatorHelper> gl_helper,
GpuOrigin::Mode gpu_origin)
: gl_helper_(gl_helper), gpu_origin_(gpu_origin) {}
absl::Status Init() {
return gl_helper_->RunInGlContext([this]() -> absl::Status {
const GLint attr_location[kNumAttributes] = {
kAttribVertex,
kAttribTexturePosition,
};
const GLchar* attr_name[kNumAttributes] = {
"position",
"texture_coordinate",
};
constexpr GLchar kVertShader[] = R"(
in vec4 position;
in mediump vec4 texture_coordinate;
out mediump vec2 sample_coordinate;
uniform mat4 transform_matrix;
void main() {
gl_Position = position;
vec4 tc = transform_matrix * texture_coordinate;
sample_coordinate = tc.xy;
}
)";
constexpr GLchar kFragShader[] = R"(
DEFAULT_PRECISION(mediump, float)
in vec2 sample_coordinate;
uniform sampler2D input_texture;
#ifdef GL_ES
#define fragColor gl_FragColor
#else
out vec4 fragColor;
#endif // defined(GL_ES);
void main() {
vec4 color = texture2D(input_texture, sample_coordinate);
#ifdef CUSTOM_ZERO_BORDER_MODE
float out_of_bounds =
float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 ||
sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0);
color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds);
#endif // defined(CUSTOM_ZERO_BORDER_MODE)
fragColor = color;
}
)";
// Create program and set parameters.
auto create_fn = [&](const std::string& vs,
const std::string& fs) -> absl::StatusOr<Program> {
GLuint program = 0;
GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0],
attr_location, &program);
RET_CHECK(program) << "Problem initializing warp affine program.";
glUseProgram(program);
glUniform1i(glGetUniformLocation(program, "input_texture"), 1);
GLint matrix_id = glGetUniformLocation(program, "transform_matrix");
return Program{.id = program, .matrix_id = matrix_id};
};
const std::string vert_src =
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader);
const std::string frag_src = absl::StrCat(
mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader);
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
std::string custom_zero_border_mode_def = R"(
#define CUSTOM_ZERO_BORDER_MODE
)";
const std::string frag_custom_zero_src =
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
custom_zero_border_mode_def, kFragShader);
return create_fn(vert_src, frag_custom_zero_src);
};
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
}
#else
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
glGenFramebuffers(1, &framebuffer_);
// vertex storage
glGenBuffers(2, vbo_);
glGenVertexArrays(1, &vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
return absl::OkStatus();
});
}
absl::StatusOr<std::unique_ptr<GpuBuffer>> Run(
const GpuBuffer& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
std::unique_ptr<GpuBuffer> gpu_buffer;
MP_RETURN_IF_ERROR(
gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode,
&gpu_buffer]() -> absl::Status {
auto input_texture = gl_helper_->CreateSourceTexture(input);
auto output_texture = gl_helper_->CreateDestinationTexture(
size.width, size.height, input.format());
MP_RETURN_IF_ERROR(
RunInternal(input_texture, matrix, border_mode, &output_texture));
gpu_buffer = output_texture.GetFrame<GpuBuffer>();
return absl::OkStatus();
}));
return gpu_buffer;
}
absl::Status RunInternal(const GlTexture& texture,
const std::array<float, 16>& matrix,
AffineTransformation::BorderMode border_mode,
GlTexture* output) {
glDisable(GL_DEPTH_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
glViewport(0, 0, output->width(), output->height());
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, output->name());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
output->name(), 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(texture.target(), texture.name());
// a) Filtering.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
// b) Clamping.
std::optional<Program> program = program_;
switch (border_mode) {
case AffineTransformation::BorderMode::kReplicate: {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
break;
}
case AffineTransformation::BorderMode::kZero: {
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
if (program_custom_zero_) {
program = program_custom_zero_;
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR,
std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f}.data());
}
#else
RET_CHECK(program_custom_zero_)
<< "Program must have been initialized.";
program = program_custom_zero_;
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
break;
}
}
glUseProgram(program->id);
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> eigen_mat(matrix.data());
if (IsMatrixVerticalFlipNeeded(gpu_origin_)) {
// @matrix describes affine transformation in terms of TOP LEFT origin, so
// in some cases/on some platforms an extra flipping should be done before
// and after.
const Eigen::Matrix<float, 4, 4, Eigen::RowMajor> flip_y(
{{1.0f, 0.0f, 0.0f, 0.0f},
{0.0f, -1.0f, 0.0f, 1.0f},
{0.0f, 0.0f, 1.0f, 0.0f},
{0.0f, 0.0f, 0.0f, 1.0f}});
eigen_mat = flip_y * eigen_mat * flip_y;
}
// If GL context is ES2, then GL_FALSE must be used for 'transpose'
// GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported.
// Hence, transposing the matrix and always passing transposed.
eigen_mat.transposeInPlace();
glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data());
// vao
glBindVertexArray(vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glEnableVertexAttribArray(kAttribVertex);
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glEnableVertexAttribArray(kAttribTexturePosition);
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
// draw
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Resetting to MediaPipe texture param defaults.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glDisableVertexAttribArray(kAttribVertex);
glDisableVertexAttribArray(kAttribTexturePosition);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
return absl::OkStatus();
}
~GlTextureWarpAffineRunner() override {
gl_helper_->RunInGlContext([this]() {
// Release OpenGL resources.
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
if (program_.id != 0) glDeleteProgram(program_.id);
if (program_custom_zero_ && program_custom_zero_->id != 0) {
glDeleteProgram(program_custom_zero_->id);
}
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
glDeleteBuffers(2, vbo_);
});
}
private:
struct Program {
GLuint id;
GLint matrix_id;
};
std::shared_ptr<GlCalculatorHelper> gl_helper_;
GpuOrigin::Mode gpu_origin_;
GLuint vao_ = 0;
GLuint vbo_[2] = {0, 0};
Program program_;
std::optional<Program> program_custom_zero_;
GLuint framebuffer_ = 0;
};
#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
} // namespace
absl::StatusOr<std::unique_ptr<
AffineTransformation::Runner<GpuBuffer, std::unique_ptr<GpuBuffer>>>>
CreateAffineTransformationGlRunner(
std::shared_ptr<GlCalculatorHelper> gl_helper, GpuOrigin::Mode gpu_origin) {
auto runner =
absl::make_unique<GlTextureWarpAffineRunner>(gl_helper, gpu_origin);
MP_RETURN_IF_ERROR(runner->Init());
return runner;
}
} // namespace mediapipe

View File

@ -0,0 +1,36 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
namespace mediapipe {
absl::StatusOr<std::unique_ptr<AffineTransformation::Runner<
mediapipe::GpuBuffer, std::unique_ptr<mediapipe::GpuBuffer>>>>
CreateAffineTransformationGlRunner(
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper,
mediapipe::GpuOrigin::Mode gpu_origin);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_

View File

@ -0,0 +1,160 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
#include <memory>
#include "absl/memory/memory.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace {
cv::BorderTypes GetBorderModeForOpenCv(
AffineTransformation::BorderMode border_mode) {
switch (border_mode) {
case AffineTransformation::BorderMode::kZero:
return cv::BORDER_CONSTANT;
case AffineTransformation::BorderMode::kReplicate:
return cv::BORDER_REPLICATE;
}
}
class OpenCvRunner
: public AffineTransformation::Runner<ImageFrame, ImageFrame> {
public:
absl::StatusOr<ImageFrame> Run(
const ImageFrame& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
// accepts and produces relative coordinates) should be adjusted to first
// normalize coordinates and then scale them.
// clang-format off
cv::Matx44f normalize_dst_coordinate({
1.0f / size.width, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f / size.height, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f scale_src_coordinate({
1.0f * input.Width(), 0.0f, 0.0f, 0.0f,
0.0f, 1.0f * input.Height(), 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
cv::Matx44f adjust_dst_coordinate;
cv::Matx44f adjust_src_coordinate;
// TODO: update to always use accurate implementation.
constexpr bool kOpenCvCompatibility = true;
if (kOpenCvCompatibility) {
adjust_dst_coordinate = normalize_dst_coordinate;
adjust_src_coordinate = scale_src_coordinate;
} else {
// To do an accurate affine image transformation and make "on-cpu" and
// "on-gpu" calculations aligned - extra offset is required to select
// correct pixels.
//
// Each destination pixel corresponds to some pixels region from source
// image.(In case of downscaling there can be more than one pixel.) The
// offset for x and y is calculated in the way, so pixel in the middle of
// the region is selected.
//
// For simplicity sake, let's consider downscaling from 100x50 to 10x10
// without a rotation:
// 1. Each destination pixel corresponds to 10x5 region
// X range: [0, .. , 9]
// Y range: [0, .. , 4]
// 2. Considering we have __discrete__ pixels, the center of the region is
// between (4, 2) and (5, 2) pixels, let's assume it's a "pixel"
// (4.5, 2).
// 3. When using the above as an offset for every pixel select while
// downscaling, resulting pixels are:
// (4.5, 2), (14.5, 2), .. , (94.5, 2)
// (4.5, 7), (14.5, 7), .. , (94.5, 7)
// ..
// (4.5, 47), (14.5, 47), .., (94.5, 47)
// instead of:
// (0, 0), (10, 0), .. , (90, 0)
// (0, 5), (10, 7), .. , (90, 5)
// ..
// (0, 45), (10, 45), .., (90, 45)
// The latter looks shifted.
//
// Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to
// the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence,
// transformation matrix should shift coordinates by (0.5, 0.5) as the
// very first step.
//
// Due to the above shift, transformed coordinates would be valid for
// float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0).
// T0 make it valid for __discrete__ pixels, transformation matrix should
// shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we
// get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.)
// clang-format off
cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f,
0.0f, 1.0f, 0.0f, 0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f,
0.0f, 1.0f, 0.0f, -0.5f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f});
// clang-format on
adjust_dst_coordinate = normalize_dst_coordinate * shift_dst;
adjust_src_coordinate = shift_src * scale_src_coordinate;
}
cv::Matx44f transform(matrix.data());
cv::Matx44f transform_absolute =
adjust_src_coordinate * transform * adjust_dst_coordinate;
cv::Mat in_mat = formats::MatView(&input);
cv::Mat cv_affine_transform(2, 3, CV_32F);
cv_affine_transform.at<float>(0, 0) = transform_absolute.val[0];
cv_affine_transform.at<float>(0, 1) = transform_absolute.val[1];
cv_affine_transform.at<float>(0, 2) = transform_absolute.val[3];
cv_affine_transform.at<float>(1, 0) = transform_absolute.val[4];
cv_affine_transform.at<float>(1, 1) = transform_absolute.val[5];
cv_affine_transform.at<float>(1, 2) = transform_absolute.val[7];
ImageFrame out_image(input.Format(), size.width, size.height);
cv::Mat out_mat = formats::MatView(&out_image);
cv::warpAffine(in_mat, out_mat, cv_affine_transform,
cv::Size(out_mat.cols, out_mat.rows),
/*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP,
GetBorderModeForOpenCv(border_mode));
return out_image;
}
};
} // namespace
absl::StatusOr<
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
CreateAffineTransformationOpenCvRunner() {
return absl::make_unique<OpenCvRunner>();
}
} // namespace mediapipe

View File

@ -0,0 +1,32 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/framework/formats/image_frame.h"
namespace mediapipe {
absl::StatusOr<
std::unique_ptr<AffineTransformation::Runner<ImageFrame, ImageFrame>>>
CreateAffineTransformationOpenCvRunner();
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_

View File

@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) {
scale_image::FindOutputDimensions(crop_width_, crop_height_, // scale_image::FindOutputDimensions(crop_width_, crop_height_, //
options_.target_width(), // options_.target_width(), //
options_.target_height(), // options_.target_height(), //
options_.target_max_area(), //
options_.preserve_aspect_ratio(), // options_.preserve_aspect_ratio(), //
options_.scale_to_multiple_of(), // options_.scale_to_multiple_of(), //
&output_width_, &output_height_)); &output_width_, &output_height_));

View File

@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions {
optional int32 target_width = 1; optional int32 target_width = 1;
optional int32 target_height = 2; optional int32 target_height = 2;
// If set, then automatically calculates a target_width and target_height that
// has an area below the target max area. Aspect ratio preservation cannot be
// disabled.
optional int32 target_max_area = 15;
// If true, the image is scaled up or down proportionally so that it // If true, the image is scaled up or down proportionally so that it
// fits inside the box represented by target_width and target_height. // fits inside the box represented by target_width and target_height.
// Otherwise it is scaled to fit target_width and target_height // Otherwise it is scaled to fit target_width and target_height

View File

@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, //
int input_height, // int input_height, //
int target_width, // int target_width, //
int target_height, // int target_height, //
int target_max_area, //
bool preserve_aspect_ratio, // bool preserve_aspect_ratio, //
int scale_to_multiple_of, // int scale_to_multiple_of, //
int* output_width, int* output_height) { int* output_width, int* output_height) {
CHECK(output_width); CHECK(output_width);
CHECK(output_height); CHECK(output_height);
if (target_max_area > 0 && input_width * input_height > target_max_area) {
preserve_aspect_ratio = true;
target_height = static_cast<int>(sqrt(static_cast<double>(target_max_area) /
(static_cast<double>(input_width) /
static_cast<double>(input_height))));
target_width = -1; // Resize width to preserve aspect ratio.
}
if (preserve_aspect_ratio) { if (preserve_aspect_ratio) {
RET_CHECK(scale_to_multiple_of == 2) RET_CHECK(scale_to_multiple_of == 2)
<< "FindOutputDimensions always outputs width and height that are " << "FindOutputDimensions always outputs width and height that are "
@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, //
<< "Unable to set output dimensions based on target dimensions."; << "Unable to set output dimensions based on target dimensions.";
} }
absl::Status FindOutputDimensions(int input_width, //
int input_height, //
int target_width, //
int target_height, //
bool preserve_aspect_ratio, //
int scale_to_multiple_of, //
int* output_width, int* output_height) {
return FindOutputDimensions(
input_width, input_height, target_width, target_height, -1,
preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height);
}
} // namespace scale_image } // namespace scale_image
} // namespace mediapipe } // namespace mediapipe

View File

@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, //
int* crop_width, int* crop_height, // int* crop_width, int* crop_height, //
int* col_start, int* row_start); int* col_start, int* row_start);
// Given an input width and height, a target width and height, whether to // Given an input width and height, a target width and height or max area,
// preserve the aspect ratio, and whether to round-down to the multiple of a // whether to preserve the aspect ratio, and whether to round-down to the
// given number nearest to the targets, determine the output width and height. // multiple of a given number nearest to the targets, determine the output width
// If target_width or target_height is non-positive, then they will be set to // and height. If target_width or target_height is non-positive, then they will
// the input_width and input_height respectively. If scale_to_multiple_of is // be set to the input_width and input_height respectively. If target_area is
// less than 1, it will be treated like 1. The output_width and // non-positive, then it will be ignored. If scale_to_multiple_of is less than
// output_height will be reduced as necessary to preserve_aspect_ratio if the // 1, it will be treated like 1. The output_width and output_height will be
// option is specified. If preserving the aspect ratio is desired, you must set // reduced as necessary to preserve_aspect_ratio if the option is specified. If
// scale_to_multiple_of to 2. // preserving the aspect ratio is desired, you must set scale_to_multiple_of
// to 2.
absl::Status FindOutputDimensions(int input_width, int input_height, //
int target_width,
int target_height, //
int target_max_area, //
bool preserve_aspect_ratio, //
int scale_to_multiple_of, //
int* output_width, int* output_height);
// Backwards compatible helper.
absl::Status FindOutputDimensions(int input_width, int input_height, // absl::Status FindOutputDimensions(int input_width, int input_height, //
int target_width, int target_width,
int target_height, // int target_height, //

View File

@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) {
int output_width; int output_width;
int output_height; int output_height;
// Not scale. // Not scale.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height); EXPECT_EQ(100, output_height);
// Not scale with odd input size. // Not scale with odd input size.
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(201, output_width); EXPECT_EQ(201, output_width);
EXPECT_EQ(101, output_height); EXPECT_EQ(101, output_height);
// Scale down by 1/2. // Scale down by 1/2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale up, doubling dimensions. // Scale up, doubling dimensions.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(400, output_width); EXPECT_EQ(400, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Fits a 2:1 image into a 150 x 150 box. Output dimensions are always // Fits a 2:1 image into a 150 x 150 box. Output dimensions are always
// visible by 2. // visible by 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(150, output_width); EXPECT_EQ(150, output_width);
EXPECT_EQ(74, output_height); EXPECT_EQ(74, output_height);
// Fits a 2:1 image into a 400 x 50 box. // Fits a 2:1 image into a 400 x 50 box.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to multiple number with odd targe size. // Scale to multiple number with odd targe size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to multiple number with odd targe size. // Scale to multiple number with odd targe size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(50, output_height); EXPECT_EQ(50, output_height);
// Scale to odd size. // Scale to odd size.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(151, output_width); EXPECT_EQ(151, output_width);
EXPECT_EQ(101, output_height); EXPECT_EQ(101, output_height);
} }
@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) {
int output_width; int output_width;
int output_height; int output_height;
// Scale width only. // Scale width only.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_width);
EXPECT_EQ(100, output_height); EXPECT_EQ(100, output_height);
// Scale height only. // Scale height only.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Scale both dimensions. // Scale both dimensions.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(150, output_width); EXPECT_EQ(150, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
} }
@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) {
int output_width; int output_width;
int output_height; int output_height;
// Set no targets, downscale to a multiple of 8. // Set no targets, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_width);
EXPECT_EQ(96, output_height); EXPECT_EQ(96, output_height);
// Set width target, downscale to a multiple of 8. // Set width target, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_width);
EXPECT_EQ(96, output_height); EXPECT_EQ(96, output_height);
// Set height target, downscale to a multiple of 8. // Set height target, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Set both targets, downscale to a multiple of 8. // Set both targets, downscale to a multiple of 8.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8,
&output_height)); &output_width, &output_height));
EXPECT_EQ(144, output_width); EXPECT_EQ(144, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Doesn't throw error if keep aspect is true and downscale multiple is 2. // Doesn't throw error if keep aspect is true and downscale multiple is 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2,
&output_height)); &output_width, &output_height));
EXPECT_EQ(400, output_width); EXPECT_EQ(400, output_width);
EXPECT_EQ(200, output_height); EXPECT_EQ(200, output_height);
// Throws error if keep aspect is true, but downscale multiple is not 2. // Throws error if keep aspect is true, but downscale multiple is not 2.
ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width, ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4,
&output_height), &output_width, &output_height),
testing::Not(testing::status::IsOk())); testing::Not(testing::status::IsOk()));
// Downscaling to multiple ignored if multiple is less than 2. // Downscaling to multiple ignored if multiple is less than 2.
MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width, MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1,
&output_height)); &output_width, &output_height));
EXPECT_EQ(401, output_width); EXPECT_EQ(401, output_width);
EXPECT_EQ(201, output_height); EXPECT_EQ(201, output_height);
} }
// Tests scaling without keeping the aspect ratio fixed.
TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) {
int output_width;
int output_height;
// Smaller area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2,
&output_width, &output_height));
EXPECT_NEAR(
200 / 100,
static_cast<double>(output_width) / static_cast<double>(output_height),
0.1f);
EXPECT_LE(output_width * output_height, 9000);
// Close to original area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2,
&output_width, &output_height));
EXPECT_NEAR(
200.0 / 100.0,
static_cast<double>(output_width) / static_cast<double>(output_height),
0.1f);
EXPECT_LE(output_width * output_height, 19999);
// Don't scale with larger area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
// Don't scale with equal area.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
// Don't scale at all.
MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2,
&output_width, &output_height));
EXPECT_EQ(200, output_width);
EXPECT_EQ(100, output_height);
}
} // namespace } // namespace
} // namespace scale_image } // namespace scale_image
} // namespace mediapipe } // namespace mediapipe

View File

@ -0,0 +1,211 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/image/warp_affine_calculator.h"
#include <array>
#include <cstdint>
#include <memory>
#include "mediapipe/calculators/image/affine_transformation.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/calculators/image/affine_transformation_runner_gl.h"
#endif // !MEDIAPIPE_DISABLE_GPU
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h"
#include "mediapipe/calculators/image/warp_affine_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/ret_check.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
namespace {
AffineTransformation::BorderMode GetBorderMode(
mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) {
switch (border_mode) {
case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO:
return AffineTransformation::BorderMode::kZero;
case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED:
case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE:
return AffineTransformation::BorderMode::kReplicate;
}
}
template <typename ImageT>
class WarpAffineRunnerHolder {};
template <>
class WarpAffineRunnerHolder<ImageFrame> {
public:
using RunnerType = AffineTransformation::Runner<ImageFrame, ImageFrame>;
absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); }
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner());
}
return runner_.get();
}
private:
std::unique_ptr<RunnerType> runner_;
};
#if !MEDIAPIPE_DISABLE_GPU
template <>
class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
public:
using RunnerType =
AffineTransformation::Runner<mediapipe::GpuBuffer,
std::unique_ptr<mediapipe::GpuBuffer>>;
absl::Status Open(CalculatorContext* cc) {
gpu_origin_ =
cc->Options<mediapipe::WarpAffineCalculatorOptions>().gpu_origin();
gl_helper_ = std::make_shared<mediapipe::GlCalculatorHelper>();
return gl_helper_->Open(cc);
}
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_));
}
return runner_.get();
}
private:
mediapipe::GpuOrigin::Mode gpu_origin_;
std::shared_ptr<mediapipe::GlCalculatorHelper> gl_helper_;
std::unique_ptr<RunnerType> runner_;
};
#endif // !MEDIAPIPE_DISABLE_GPU
template <>
class WarpAffineRunnerHolder<mediapipe::Image> {
public:
absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); }
absl::StatusOr<
AffineTransformation::Runner<mediapipe::Image, mediapipe::Image>*>
GetRunner() {
return &runner_;
}
private:
class Runner : public AffineTransformation::Runner<mediapipe::Image,
mediapipe::Image> {
public:
absl::Status Open(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::StatusOr<mediapipe::Image> Run(
const mediapipe::Image& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
if (input.UsesGpu()) {
#if !MEDIAPIPE_DISABLE_GPU
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
size, border_mode));
return mediapipe::Image(*result);
#else
return absl::UnavailableError("GPU support is disabled");
#endif // !MEDIAPIPE_DISABLE_GPU
}
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
const auto& frame_ptr = input.GetImageFrameSharedPtr();
// Wrap image into image frame.
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
frame_ptr->Height(), frame_ptr->WidthStep(),
const_cast<uint8_t*>(frame_ptr->PixelData()),
[](uint8* data) {});
ASSIGN_OR_RETURN(auto result,
runner->Run(image_frame, matrix, size, border_mode));
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
}
private:
WarpAffineRunnerHolder<ImageFrame> cpu_holder_;
#if !MEDIAPIPE_DISABLE_GPU
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
#endif // !MEDIAPIPE_DISABLE_GPU
};
Runner runner_;
};
template <typename InterfaceT>
class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
public:
#if !MEDIAPIPE_DISABLE_GPU
static absl::Status UpdateContract(CalculatorContract* cc) {
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
}
return absl::OkStatus();
}
#endif // !MEDIAPIPE_DISABLE_GPU
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
absl::Status Process(CalculatorContext* cc) override {
if (InterfaceT::kInImage(cc).IsEmpty() ||
InterfaceT::kMatrix(cc).IsEmpty() ||
InterfaceT::kOutputSize(cc).IsEmpty()) {
return absl::OkStatus();
}
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
AffineTransformation::Size output_size;
output_size.width = out_width;
output_size.height = out_height;
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
ASSIGN_OR_RETURN(
auto result,
runner->Run(
*InterfaceT::kInImage(cc), transform, output_size,
GetBorderMode(cc->Options<mediapipe::WarpAffineCalculatorOptions>()
.border_mode())));
InterfaceT::kOutImage(cc).Send(std::move(result));
return absl::OkStatus();
}
private:
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
holder_;
};
} // namespace
MEDIAPIPE_NODE_IMPLEMENTATION(
WarpAffineCalculatorImpl<WarpAffineCalculatorCpu>);
#if !MEDIAPIPE_DISABLE_GPU
MEDIAPIPE_NODE_IMPLEMENTATION(
WarpAffineCalculatorImpl<WarpAffineCalculatorGpu>);
#endif // !MEDIAPIPE_DISABLE_GPU
MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl<WarpAffineCalculator>);
} // namespace mediapipe

View File

@ -0,0 +1,94 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
// Runs affine transformation.
//
// Input:
// IMAGE - Image/ImageFrame/GpuBuffer
//
// MATRIX - std::array<float, 16>
// Used as following:
// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3],
// matrix[4] * x + matrix[5] * y + matrix[7])
// where x and y ranges are defined by @OUTPUT_SIZE.
//
// OUTPUT_SIZE - std::pair<int, int>
// Size of the output image.
//
// Output:
// IMAGE - Image/ImageFrame/GpuBuffer
//
// Note:
// - Output image type and format are the same as the input one.
//
// Usage example:
// node {
// calculator: "WarpAffineCalculator(Cpu|Gpu)"
// input_stream: "IMAGE:image"
// input_stream: "MATRIX:matrix"
// input_stream: "OUTPUT_SIZE:size"
// output_stream: "IMAGE:transformed_image"
// options: {
// [mediapipe.WarpAffineCalculatorOptions.ext] {
// border_mode: BORDER_ZERO
// }
// }
// }
template <typename ImageT>
class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf {
public:
static constexpr mediapipe::api2::Input<ImageT> kInImage{"IMAGE"};
static constexpr mediapipe::api2::Input<std::array<float, 16>> kMatrix{
"MATRIX"};
static constexpr mediapipe::api2::Input<std::pair<int, int>> kOutputSize{
"OUTPUT_SIZE"};
static constexpr mediapipe::api2::Output<ImageT> kOutImage{"IMAGE"};
};
class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf<ImageFrame> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix,
kOutputSize, kOutImage);
};
#if !MEDIAPIPE_DISABLE_GPU
class WarpAffineCalculatorGpu
: public WarpAffineCalculatorIntf<mediapipe::GpuBuffer> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix,
kOutputSize, kOutImage);
};
#endif // !MEDIAPIPE_DISABLE_GPU
class WarpAffineCalculator : public WarpAffineCalculatorIntf<mediapipe::Image> {
public:
MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize,
kOutImage);
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_

View File

@ -0,0 +1,46 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/gpu/gpu_origin.proto";
message WarpAffineCalculatorOptions {
extend CalculatorOptions {
optional WarpAffineCalculatorOptions ext = 373693895;
}
// Pixel extrapolation methods. See @border_mode.
enum BorderMode {
BORDER_UNSPECIFIED = 0;
BORDER_ZERO = 1;
BORDER_REPLICATE = 2;
}
// Pixel extrapolation method.
// When converting image to tensor it may happen that tensor needs to read
// pixels outside image boundaries. Border mode helps to specify how such
// pixels will be calculated.
//
// BORDER_REPLICATE is used by default.
optional BorderMode border_mode = 1;
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
// to be flipped vertically as tensors are expected to start at top.
// (DEFAULT or unset interpreted as CONVENTIONAL.)
optional GpuOrigin.Mode gpu_origin = 2;
}

View File

@ -0,0 +1,615 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <vector>
#include "absl/flags/flag.h"
#include "absl/memory/memory.h"
#include "absl/strings/substitute.h"
#include "mediapipe/calculators/image/affine_transformation.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
cv::Mat GetRgb(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3);
int from_to[] = {0, 2, 1, 1, 2, 0};
cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3);
return rgb;
}
cv::Mat GetRgba(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0));
int from_to[] = {0, 2, 1, 1, 2, 0};
cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3);
return bgr;
}
// Test template.
// No processing/assertions should be done after the function is invoked.
void RunTest(const std::string& graph_text, const std::string& tag,
const cv::Mat& input, cv::Mat expected_result,
float similarity_threshold, std::array<float, 16> matrix,
int out_width, int out_height,
absl::optional<AffineTransformation::BorderMode> border_mode) {
std::string border_mode_str;
if (border_mode) {
switch (*border_mode) {
case AffineTransformation::BorderMode::kReplicate:
border_mode_str = "border_mode: BORDER_REPLICATE";
break;
case AffineTransformation::BorderMode::kZero:
border_mode_str = "border_mode: BORDER_ZERO";
break;
}
}
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(graph_text, /*$0=*/border_mode_str));
std::vector<Packet> output_packets;
tool::AddVectorSink("output_image", &graph_config, &output_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
ImageFrame input_image(
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
input.cols, input.rows, input.step, input.data, [](uint8*) {});
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image",
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"matrix",
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"output_size", MakePacket<std::pair<int, int>>(
std::pair<int, int>(out_width, out_height))
.At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_packets, testing::SizeIs(1));
// Get and process results.
const ImageFrame& out_frame = output_packets[0].Get<ImageFrame>();
cv::Mat result = formats::MatView(&out_frame);
double similarity =
1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2);
EXPECT_GE(similarity, similarity_threshold);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
MP_ASSERT_OK(graph.CloseInputStream("output_size"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
enum class InputType { kImageFrame, kImage };
// Similarity is checked against OpenCV results always, and due to differences
// on how OpenCV and GL treats pixels there are two thresholds.
// TODO: update to have just one threshold when OpenCV
// implementation is updated.
struct SimilarityConfig {
double threshold_on_cpu;
double threshold_on_gpu;
};
void RunTest(cv::Mat input, cv::Mat expected_result,
const SimilarityConfig& similarity, std::array<float, 16> matrix,
int out_width, int out_height,
absl::optional<AffineTransformation::BorderMode> border_mode) {
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "WarpAffineCalculatorCpu"
input_stream: "IMAGE:input_image"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
}
}
}
)",
"cpu", input, expected_result, similarity.threshold_on_cpu, matrix,
out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:input_image"
output_stream: "IMAGE:input_image_unified"
}
node {
calculator: "WarpAffineCalculator"
input_stream: "IMAGE:input_image_unified"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_unified"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
}
}
}
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:output_image_unified"
output_stream: "IMAGE_CPU:output_image"
}
)",
"cpu_image", input, expected_result, similarity.threshold_on_cpu,
matrix, out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "input_image"
output_stream: "input_image_gpu"
}
node {
calculator: "WarpAffineCalculatorGpu"
input_stream: "IMAGE:input_image_gpu"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_gpu"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
gpu_origin: TOP_LEFT
}
}
}
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "output_image_gpu"
output_stream: "output_image"
}
)",
"gpu", input, expected_result, similarity.threshold_on_gpu, matrix,
out_width, out_height, border_mode);
RunTest(R"(
input_stream: "input_image"
input_stream: "output_size"
input_stream: "matrix"
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "input_image"
output_stream: "input_image_gpu"
}
node {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:input_image_gpu"
output_stream: "IMAGE:input_image_unified"
}
node {
calculator: "WarpAffineCalculator"
input_stream: "IMAGE:input_image_unified"
input_stream: "MATRIX:matrix"
input_stream: "OUTPUT_SIZE:output_size"
output_stream: "IMAGE:output_image_unified"
options {
[mediapipe.WarpAffineCalculatorOptions.ext] {
$0 # border mode
gpu_origin: TOP_LEFT
}
}
}
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:output_image_unified"
output_stream: "IMAGE_GPU:output_image_gpu"
}
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "output_image_gpu"
output_stream: "output_image"
}
)",
"gpu_image", input, expected_result, similarity.threshold_on_gpu,
matrix, out_width, out_height, border_mode);
}
std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
bool keep_aspect_ratio, int out_width,
int out_height) {
std::array<float, 16> transform_mat;
mediapipe::RotatedRect roi_absolute =
mediapipe::GetRoi(input.cols, input.rows, roi);
mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute)
.IgnoreError();
mediapipe::GetRotatedSubRectToRectTransformMatrix(
roi_absolute, input.cols, input.rows,
/*flip_horizontaly=*/false, &transform_mat);
return transform_mat;
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode = {};
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * 90.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * 90.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * -45.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * -45.0f / 180.0f);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_with_rotation_border_zero.png");
int out_width = 256;
int out_height = 256;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = false;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
auto input = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(M_PI * -15.0f / 180.0f);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode = {};
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(M_PI * -15.0f / 180.0f);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation_border_zero.png");
int out_width = 128;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, NoOp) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.0f);
roi.set_height(1.0f);
roi.set_rotation(0);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png");
int out_width = 64;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kReplicate;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
TEST(WarpAffineCalculatorTest, NoOpBorderZero) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.0f);
roi.set_height(1.0f);
roi.set_rotation(0);
auto input = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg");
auto expected_output = GetRgba(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png");
int out_width = 64;
int out_height = 128;
bool keep_aspect_ratio = true;
std::optional<AffineTransformation::BorderMode> border_mode =
AffineTransformation::BorderMode::kZero;
RunTest(input, expected_output,
{.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99},
GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height),
out_width, out_height, border_mode);
}
} // namespace
} // namespace mediapipe

View File

@ -26,6 +26,11 @@ licenses(["notice"])
package(default_visibility = ["//visibility:private"]) package(default_visibility = ["//visibility:private"])
exports_files(
glob(["testdata/image_to_tensor/*"]),
visibility = ["//mediapipe/calculators/image:__subpackages__"],
)
selects.config_setting_group( selects.config_setting_group(
name = "compute_shader_unavailable", name = "compute_shader_unavailable",
match_any = [ match_any = [

View File

@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer;
// TENSORS - std::vector<Tensor> // TENSORS - std::vector<Tensor>
// Vector containing a single Tensor populated with an extrated RGB image. // Vector containing a single Tensor populated with an extrated RGB image.
// MATRIX - std::array<float, 16> @Optional // MATRIX - std::array<float, 16> @Optional
// An std::array<float, 16> representing a 4x4 row-major-order matrix which // An std::array<float, 16> representing a 4x4 row-major-order matrix that
// can be used to map a point on the output tensor to a point on the input // maps a point on the input image to a point on the output tensor, and
// image. // can be used to reverse the mapping by inverting the matrix.
// LETTERBOX_PADDING - std::array<float, 4> @Optional // LETTERBOX_PADDING - std::array<float, 4> @Optional
// An std::array<float, 4> representing the letterbox padding from the 4 // An std::array<float, 4> representing the letterbox padding from the 4
// sides ([left, top, right, bottom]) of the output image, normalized to // sides ([left, top, right, bottom]) of the output image, normalized to

View File

@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
uniform sampler2D frame; uniform sampler2D frame;
void main() { void main() {
$1 // flip vec2 coord = $1
vec4 pixel = texture2D(frame, sample_coordinate); vec4 pixel = texture2D(frame, coord);
$2 // normalize [-1,1] $2 // normalize [-1,1]
fragColor.r = pixel.r; // r channel fragColor.r = pixel.r; // r channel
$3 // g & b channels $3 // g & b channels
@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
})", })",
/*$0=*/single_channel ? "vec1" : "vec4", /*$0=*/single_channel ? "vec1" : "vec4",
/*$1=*/ /*$1=*/
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;" flip_vertically_
: "", ? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
: "sample_coordinate;",
/*$2=*/output_range_.has_value() /*$2=*/output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);", ? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first), (output_range_->second - output_range_->first),

View File

@ -587,9 +587,21 @@ cc_library(
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
] + select({ ] + select({
"//conditions:default": [ "//conditions:default": [
"//mediapipe/framework/port:file_helpers",
], ],
}), "//mediapipe:android": [],
}) + select(
{
"//conditions:default": [
],
},
) + select(
{
"//conditions:default": [
],
"//mediapipe:android": [
],
},
),
alwayslink = 1, alwayslink = 1,
) )

View File

@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE";
const char kImageTag[] = "IMAGE"; const char kImageTag[] = "IMAGE";
const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_"; const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_";
const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_"; const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_";
const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_";
const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED"; const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
const char kBBoxTag[] = "BBOX"; const char kBBoxTag[] = "BBOX";
const char kKeypointsTag[] = "KEYPOINTS"; const char kKeypointsTag[] = "KEYPOINTS";
@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase {
if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) { if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<float>>(); cc->Inputs().Tag(tag).Set<std::vector<float>>();
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
cc->Inputs().Tag(tag).Set<std::vector<std::string>>();
}
} }
CHECK(cc->Outputs().HasTag(kSequenceExampleTag) || CHECK(cc->Outputs().HasTag(kSequenceExampleTag) ||
@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase {
mpms::ClearFeatureFloats(key, sequence_.get()); mpms::ClearFeatureFloats(key, sequence_.get());
mpms::ClearFeatureTimestamp(key, sequence_.get()); mpms::ClearFeatureTimestamp(key, sequence_.get());
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) {
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
sizeof(*kBytesFeaturePrefixTag) -
1);
mpms::ClearFeatureBytes(key, sequence_.get());
mpms::ClearFeatureTimestamp(key, sequence_.get());
}
if (absl::StartsWith(tag, kKeypointsTag)) { if (absl::StartsWith(tag, kKeypointsTag)) {
std::string key = std::string key =
tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1); tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1);
@ -405,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase {
cc->Inputs().Tag(tag).Get<std::vector<float>>(), cc->Inputs().Tag(tag).Get<std::vector<float>>(),
sequence_.get()); sequence_.get());
} }
if (absl::StartsWith(tag, kBytesFeaturePrefixTag) &&
!cc->Inputs().Tag(tag).IsEmpty()) {
std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) /
sizeof(*kBytesFeaturePrefixTag) -
1);
mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(),
sequence_.get());
mpms::AddFeatureBytes(
key, cc->Inputs().Tag(tag).Get<std::vector<std::string>>(),
sequence_.get());
}
if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) { if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) {
std::string key = ""; std::string key = "";
if (tag != kBBoxTag) { if (tag != kBBoxTag) {

View File

@ -49,6 +49,8 @@ constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST";
constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED"; constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED";
constexpr char kAudioOtherTag[] = "AUDIO_OTHER"; constexpr char kAudioOtherTag[] = "AUDIO_OTHER";
constexpr char kAudioTestTag[] = "AUDIO_TEST"; constexpr char kAudioTestTag[] = "AUDIO_TEST";
constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER";
constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST";
constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED"; constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED";
constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER"; constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER";
constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST"; constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST";
@ -215,6 +217,54 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) {
} }
} }
TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) {
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
false, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
int num_timesteps = 2;
for (int i = 0; i < num_timesteps; ++i) {
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("foo", 2 << i));
runner_->MutableInputs()
->Tag(kBytesFeatureTestTag)
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("bar", 2 << i));
runner_->MutableInputs()
->Tag(kBytesFeatureOtherTag)
.packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i)));
}
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("TEST", output_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence));
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("OTHER", output_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence));
for (int i = 0; i < num_timesteps; ++i) {
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i));
ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i),
::testing::ElementsAreArray(
std::vector<std::string>(2, absl::StrCat("foo", 2 << i))));
ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i));
ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i),
::testing::ElementsAreArray(
std::vector<std::string>(2, absl::StrCat("bar", 2 << i))));
}
}
TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) { TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) {
SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true); SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>(); auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
@ -829,6 +879,45 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) {
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence)); ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
} }
TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) {
SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {},
false, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
int num_timesteps = 2;
for (int i = 0; i < num_timesteps; ++i) {
auto vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("foo", 2 << i));
mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get());
mpms::AddFeatureTimestamp("TEST", i, input_sequence.get());
vs_ptr = ::absl::make_unique<std::vector<std::string>>(
2, absl::StrCat("bar", 2 << i));
mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get());
mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get());
}
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("TEST", *input_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence));
ASSERT_EQ(num_timesteps,
mpms::GetFeatureTimestampSize("OTHER", *input_sequence));
ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence));
ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence));
}
TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) { TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) {
SetUpCalculator({"IMAGE:images"}, {}, false, true); SetUpCalculator({"IMAGE:images"}, {}, false, true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>(); auto input_sequence = ::absl::make_unique<tf::SequenceExample>();

View File

@ -162,6 +162,27 @@ selects.config_setting_group(
], ],
) )
config_setting(
name = "edge_tpu_usb",
define_values = {
"MEDIAPIPE_EDGE_TPU": "usb",
},
)
config_setting(
name = "edge_tpu_pci",
define_values = {
"MEDIAPIPE_EDGE_TPU": "pci",
},
)
config_setting(
name = "edge_tpu_all",
define_values = {
"MEDIAPIPE_EDGE_TPU": "all",
},
)
cc_library( cc_library(
name = "tflite_inference_calculator", name = "tflite_inference_calculator",
srcs = ["tflite_inference_calculator.cc"], srcs = ["tflite_inference_calculator.cc"],
@ -172,6 +193,12 @@ cc_library(
], ],
"//conditions:default": [], "//conditions:default": [],
}), }),
defines = select({
"//conditions:default": [],
":edge_tpu_usb": ["MEDIAPIPE_EDGE_TPU=usb"],
":edge_tpu_pci": ["MEDIAPIPE_EDGE_TPU=pci"],
":edge_tpu_all": ["MEDIAPIPE_EDGE_TPU=all"],
}),
linkopts = select({ linkopts = select({
"//mediapipe:ios": [ "//mediapipe:ios": [
"-framework CoreVideo", "-framework CoreVideo",
@ -223,6 +250,20 @@ cc_library(
"//conditions:default": [ "//conditions:default": [
"//mediapipe/util:cpu_util", "//mediapipe/util:cpu_util",
], ],
}) + select({
"//conditions:default": [],
":edge_tpu_usb": [
"@libedgetpu//tflite/public:edgetpu",
"@libedgetpu//tflite/public:oss_edgetpu_direct_usb",
],
":edge_tpu_pci": [
"@libedgetpu//tflite/public:edgetpu",
"@libedgetpu//tflite/public:oss_edgetpu_direct_pci",
],
":edge_tpu_all": [
"@libedgetpu//tflite/public:edgetpu",
"@libedgetpu//tflite/public:oss_edgetpu_direct_all",
],
}), }),
alwayslink = 1, alwayslink = 1,
) )

View File

@ -85,7 +85,22 @@ constexpr char kTensorsGpuTag[] = "TENSORS_GPU";
} // namespace } // namespace
#if defined(MEDIAPIPE_EDGE_TPU) #if defined(MEDIAPIPE_EDGE_TPU)
#include "edgetpu.h" #include "tflite/public/edgetpu.h"
// Checkes whether model contains Edge TPU custom op or not.
bool ContainsEdgeTpuCustomOp(const tflite::FlatBufferModel& model) {
const auto* opcodes = model.GetModel()->operator_codes();
for (const auto* subgraph : *model.GetModel()->subgraphs()) {
for (const auto* op : *subgraph->operators()) {
const auto* opcode = opcodes->Get(op->opcode_index());
if (opcode->custom_code() &&
opcode->custom_code()->str() == edgetpu::kCustomOp) {
return true;
}
}
}
return false;
}
// Creates and returns an Edge TPU interpreter to run the given edgetpu model. // Creates and returns an Edge TPU interpreter to run the given edgetpu model.
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter( std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
@ -94,14 +109,9 @@ std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
edgetpu::EdgeTpuContext* edgetpu_context) { edgetpu::EdgeTpuContext* edgetpu_context) {
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp()); resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
std::unique_ptr<tflite::Interpreter> interpreter; std::unique_ptr<tflite::Interpreter> interpreter;
if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) { CHECK_EQ(tflite::InterpreterBuilder(model, *resolver)(&interpreter),
std::cerr << "Failed to build edge TPU interpreter." << std::endl; kTfLiteOk);
}
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context); interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
interpreter->SetNumThreads(1);
if (interpreter->AllocateTensors() != kTfLiteOk) {
std::cerr << "Failed to allocate edge TPU tensors." << std::endl;
}
return interpreter; return interpreter;
} }
#endif // MEDIAPIPE_EDGE_TPU #endif // MEDIAPIPE_EDGE_TPU
@ -279,8 +289,7 @@ class TfLiteInferenceCalculator : public CalculatorBase {
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE #endif // MEDIAPIPE_TFLITE_GL_INFERENCE
#if defined(MEDIAPIPE_EDGE_TPU) #if defined(MEDIAPIPE_EDGE_TPU)
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_ = std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_;
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
#endif #endif
bool gpu_inference_ = false; bool gpu_inference_ = false;
@ -303,6 +312,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
// Calculator Core Section // Calculator Core Section
namespace { namespace {
constexpr char kCustomOpResolverTag[] = "CUSTOM_OP_RESOLVER";
constexpr char kModelTag[] = "MODEL";
template <class CC> template <class CC>
bool ShouldUseGpu(CC* cc) { bool ShouldUseGpu(CC* cc) {
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED #if MEDIAPIPE_TFLITE_GPU_SUPPORTED
@ -327,7 +340,7 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
const auto& options = const auto& options =
cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>(); cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
RET_CHECK(!options.model_path().empty() ^ RET_CHECK(!options.model_path().empty() ^
cc->InputSidePackets().HasTag("MODEL")) cc->InputSidePackets().HasTag(kModelTag))
<< "Either model as side packet or model path in options is required."; << "Either model as side packet or model path in options is required.";
if (cc->Inputs().HasTag(kTensorsTag)) if (cc->Inputs().HasTag(kTensorsTag))
@ -340,13 +353,13 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
if (cc->Outputs().HasTag(kTensorsGpuTag)) if (cc->Outputs().HasTag(kTensorsGpuTag))
cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>(); cc->Outputs().Tag(kTensorsGpuTag).Set<std::vector<GpuTensor>>();
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
cc->InputSidePackets() cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER") .Tag(kCustomOpResolverTag)
.Set<tflite::ops::builtin::BuiltinOpResolver>(); .Set<tflite::ops::builtin::BuiltinOpResolver>();
} }
if (cc->InputSidePackets().HasTag("MODEL")) { if (cc->InputSidePackets().HasTag(kModelTag)) {
cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>(); cc->InputSidePackets().Tag(kModelTag).Set<TfLiteModelPtr>();
} }
if (ShouldUseGpu(cc)) { if (ShouldUseGpu(cc)) {
@ -486,8 +499,8 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(WriteKernelsToFile()); MP_RETURN_IF_ERROR(WriteKernelsToFile());
return RunInContextIfNeeded([this]() -> absl::Status { return RunInContextIfNeeded([this]() -> absl::Status {
interpreter_ = nullptr;
if (delegate_) { if (delegate_) {
interpreter_ = nullptr;
delegate_ = nullptr; delegate_ = nullptr;
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED #if MEDIAPIPE_TFLITE_GPU_SUPPORTED
if (gpu_inference_) { if (gpu_inference_) {
@ -501,7 +514,7 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) {
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED #endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
} }
#if defined(MEDIAPIPE_EDGE_TPU) #if defined(MEDIAPIPE_EDGE_TPU)
edgetpu_context_.reset(); edgetpu_context_ = nullptr;
#endif #endif
return absl::OkStatus(); return absl::OkStatus();
}); });
@ -723,9 +736,9 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
auto op_resolver_ptr = auto op_resolver_ptr =
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>( static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
&default_op_resolver); &default_op_resolver);
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
op_resolver_ptr = &(cc->InputSidePackets() op_resolver_ptr = &(cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER") .Tag(kCustomOpResolverTag)
.Get<tflite::ops::builtin::BuiltinOpResolver>()); .Get<tflite::ops::builtin::BuiltinOpResolver>());
} }
@ -825,21 +838,26 @@ absl::Status TfLiteInferenceCalculator::LoadModel(CalculatorContext* cc) {
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates
default_op_resolver; default_op_resolver;
auto op_resolver_ptr =
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
&default_op_resolver);
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
op_resolver_ptr = &(cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER")
.Get<tflite::ops::builtin::BuiltinOpResolver>());
}
#if defined(MEDIAPIPE_EDGE_TPU) #if defined(MEDIAPIPE_EDGE_TPU)
interpreter_ = if (ContainsEdgeTpuCustomOp(model)) {
BuildEdgeTpuInterpreter(model, op_resolver_ptr, edgetpu_context_.get()); edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
#else interpreter_ = BuildEdgeTpuInterpreter(model, &default_op_resolver,
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_); edgetpu_context_.get());
} else {
#endif // MEDIAPIPE_EDGE_TPU
auto op_resolver_ptr =
static_cast<const tflite::ops::builtin::BuiltinOpResolver*>(
&default_op_resolver);
if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) {
op_resolver_ptr = &(cc->InputSidePackets()
.Tag(kCustomOpResolverTag)
.Get<tflite::ops::builtin::BuiltinOpResolver>());
}
tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_);
#if defined(MEDIAPIPE_EDGE_TPU)
}
#endif // MEDIAPIPE_EDGE_TPU #endif // MEDIAPIPE_EDGE_TPU
RET_CHECK(interpreter_); RET_CHECK(interpreter_);
@ -872,8 +890,8 @@ absl::StatusOr<Packet> TfLiteInferenceCalculator::GetModelAsPacket(
if (!options.model_path().empty()) { if (!options.model_path().empty()) {
return TfLiteModelLoader::LoadFromPath(options.model_path()); return TfLiteModelLoader::LoadFromPath(options.model_path());
} }
if (cc.InputSidePackets().HasTag("MODEL")) { if (cc.InputSidePackets().HasTag(kModelTag)) {
return cc.InputSidePackets().Tag("MODEL"); return cc.InputSidePackets().Tag(kModelTag);
} }
return absl::Status(absl::StatusCode::kNotFound, return absl::Status(absl::StatusCode::kNotFound,
"Must specify TFLite model as path or loaded model."); "Must specify TFLite model as path or loaded model.");
@ -929,6 +947,8 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
kTfLiteOk); kTfLiteOk);
return absl::OkStatus(); return absl::OkStatus();
} }
#else
(void)use_xnnpack;
#endif // !EDGETPU #endif // !EDGETPU
// Return and use default tflite infernece (on CPU). No need for GPU // Return and use default tflite infernece (on CPU). No need for GPU

View File

@ -1353,3 +1353,34 @@ cc_test(
"//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:gtest_main",
], ],
) )
cc_library(
name = "inverse_matrix_calculator",
srcs = ["inverse_matrix_calculator.cc"],
hdrs = ["inverse_matrix_calculator.h"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"@com_google_absl//absl/status",
"@eigen_archive//:eigen3",
],
alwayslink = True,
)
cc_test(
name = "inverse_matrix_calculator_test",
srcs = ["inverse_matrix_calculator_test.cc"],
tags = ["desktop_only_test"],
deps = [
":inverse_matrix_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
)

View File

@ -33,6 +33,7 @@ namespace {
constexpr char kImageFrameTag[] = "IMAGE_CPU"; constexpr char kImageFrameTag[] = "IMAGE_CPU";
constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kImageTag[] = "IMAGE"; constexpr char kImageTag[] = "IMAGE";
constexpr char kSourceOnGpuTag[] = "SOURCE_ON_GPU";
} // namespace } // namespace
// A calculator for converting the unified image container into // A calculator for converting the unified image container into
@ -46,6 +47,8 @@ constexpr char kImageTag[] = "IMAGE";
// IMAGE_CPU: An ImageFrame containing output image. // IMAGE_CPU: An ImageFrame containing output image.
// IMAGE_GPU: A GpuBuffer containing output image. // IMAGE_GPU: A GpuBuffer containing output image.
// //
// SOURCE_ON_GPU: The source Image is stored on GPU or CPU.
//
// Note: // Note:
// Data is automatically transferred to/from the CPU or GPU // Data is automatically transferred to/from the CPU or GPU
// depending on output type. // depending on output type.
@ -66,6 +69,7 @@ class FromImageCalculator : public CalculatorBase {
absl::Status RenderGpu(CalculatorContext* cc); absl::Status RenderGpu(CalculatorContext* cc);
absl::Status RenderCpu(CalculatorContext* cc); absl::Status RenderCpu(CalculatorContext* cc);
bool check_image_source_ = false;
bool gpu_output_ = false; bool gpu_output_ = false;
bool gpu_initialized_ = false; bool gpu_initialized_ = false;
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
@ -102,6 +106,9 @@ absl::Status FromImageCalculator::GetContract(CalculatorContract* cc) {
#endif // !MEDIAPIPE_DISABLE_GPU #endif // !MEDIAPIPE_DISABLE_GPU
} }
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
cc->Outputs().Tag(kSourceOnGpuTag).Set<bool>();
}
return absl::OkStatus(); return absl::OkStatus();
} }
@ -111,7 +118,9 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
if (cc->Outputs().HasTag(kGpuBufferTag)) { if (cc->Outputs().HasTag(kGpuBufferTag)) {
gpu_output_ = true; gpu_output_ = true;
} }
if (cc->Outputs().HasTag(kSourceOnGpuTag)) {
check_image_source_ = true;
}
if (gpu_output_) { if (gpu_output_) {
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
@ -122,6 +131,13 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) {
} }
absl::Status FromImageCalculator::Process(CalculatorContext* cc) { absl::Status FromImageCalculator::Process(CalculatorContext* cc) {
if (check_image_source_) {
auto& input = cc->Inputs().Tag(kImageTag).Get<mediapipe::Image>();
cc->Outputs()
.Tag(kSourceOnGpuTag)
.AddPacket(MakePacket<bool>(input.UsesGpu()).At(cc->InputTimestamp()));
}
if (gpu_output_) { if (gpu_output_) {
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status { MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status {

View File

@ -0,0 +1,50 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
#include "Eigen/Core"
#include "Eigen/Geometry"
#include "Eigen/LU"
#include "absl/status/status.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
namespace mediapipe {
namespace api2 {
class InverseMatrixCalculatorImpl : public NodeImpl<InverseMatrixCalculator> {
absl::Status Process(mediapipe::CalculatorContext* cc) override {
if (kInputMatrix(cc).IsEmpty()) {
return absl::OkStatus();
}
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> matrix(
kInputMatrix(cc).Get().data());
Eigen::Matrix<float, 4, 4, Eigen::RowMajor> inverse_matrix;
bool inverse_check;
matrix.computeInverseWithCheck(inverse_matrix, inverse_check);
RET_CHECK(inverse_check) << "Inverse matrix cannot be calculated.";
std::array<float, 16> output;
Eigen::Map<Eigen::Matrix<float, 4, 4, Eigen::RowMajor>>(
output.data(), 4, 4) = inverse_matrix.matrix();
kOutputMatrix(cc).Send(std::move(output));
return absl::OkStatus();
}
};
MEDIAPIPE_NODE_IMPLEMENTATION(InverseMatrixCalculatorImpl);
} // namespace api2
} // namespace mediapipe

View File

@ -0,0 +1,51 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
namespace mediapipe {
// Runs affine transformation.
//
// Input:
// MATRIX - std::array<float, 16>
// Row major 4x4 matrix to inverse.
//
// Output:
// MATRIX - std::array<float, 16>
// Row major 4x4 inversed matrix.
//
// Usage example:
// node {
// calculator: "dishti.aimatter.InverseMatrixCalculator"
// input_stream: "MATRIX:input_matrix"
// output_stream: "MATRIX:output_matrix"
// }
class InverseMatrixCalculator : public mediapipe::api2::NodeIntf {
public:
static constexpr mediapipe::api2::Input<std::array<float, 16>> kInputMatrix{
"MATRIX"};
static constexpr mediapipe::api2::Output<std::array<float, 16>> kOutputMatrix{
"MATRIX"};
MEDIAPIPE_NODE_INTERFACE(InverseMatrixCalculator, kInputMatrix,
kOutputMatrix);
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_

View File

@ -0,0 +1,126 @@
#include "mediapipe/calculators/util/inverse_matrix_calculator.h"
#include <array>
#include "absl/memory/memory.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
void RunTest(const std::array<float, 16>& matrix,
const std::array<float, 16>& expected_inverse_matrix) {
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "matrix"
node {
calculator: "InverseMatrixCalculator"
input_stream: "MATRIX:matrix"
output_stream: "MATRIX:inverse_matrix"
}
)pb");
std::vector<Packet> output_packets;
tool::AddVectorSink("inverse_matrix", &graph_config, &output_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"matrix",
MakePacket<std::array<float, 16>>(std::move(matrix)).At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_packets, testing::SizeIs(1));
const auto& inverse_matrix = output_packets[0].Get<std::array<float, 16>>();
EXPECT_THAT(inverse_matrix, testing::Eq(expected_inverse_matrix));
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("matrix"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST(InverseMatrixCalculatorTest, Identity) {
// clang-format off
std::array<float, 16> matrix = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
std::array<float, 16> expected_inverse_matrix = {
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
// clang-format on
RunTest(matrix, expected_inverse_matrix);
}
TEST(InverseMatrixCalculatorTest, Translation) {
// clang-format off
std::array<float, 16> matrix = {
1.0f, 0.0f, 0.0f, 2.0f,
0.0f, 1.0f, 0.0f, -5.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
std::array<float, 16> expected_inverse_matrix = {
1.0f, 0.0f, 0.0f, -2.0f,
0.0f, 1.0f, 0.0f, 5.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
// clang-format on
RunTest(matrix, expected_inverse_matrix);
}
TEST(InverseMatrixCalculatorTest, Scale) {
// clang-format off
std::array<float, 16> matrix = {
5.0f, 0.0f, 0.0f, 0.0f,
0.0f, 2.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
std::array<float, 16> expected_inverse_matrix = {
0.2f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
// clang-format on
RunTest(matrix, expected_inverse_matrix);
}
TEST(InverseMatrixCalculatorTest, Rotation90) {
// clang-format off
std::array<float, 16> matrix = {
0.0f, -1.0f, 0.0f, 0.0f,
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
std::array<float, 16> expected_inverse_matrix = {
0.0f, 1.0f, 0.0f, 0.0f,
-1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 0.0f, 1.0f,
};
// clang-format on
RunTest(matrix, expected_inverse_matrix);
}
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,16 @@
@rem Remove the current res dir symlinks that are for Linux and macOS and recreate res dir symlinks for Windows.
@rem This script needs administrator permission. Must run this script as administrator.
@rem for hands example app.
cd /d %~dp0
cd hands\src\main
rm res
mklink /d res ..\..\..\res
@rem for facemesh example app.
cd /d %~dp0
cd facemesh\src\main
rm res
mklink /d res ..\..\..\res
dir
pause

View File

@ -0,0 +1,50 @@
plugins {
id 'com.android.application'
}
android {
compileSdkVersion 30
buildToolsVersion "30.0.3"
defaultConfig {
applicationId "com.google.mediapipe.apps.hands"
minSdkVersion 21
targetSdkVersion 30
versionCode 1
versionName "1.0"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
}
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar'])
implementation 'androidx.appcompat:appcompat:1.3.0'
implementation 'com.google.android.material:material:1.3.0'
implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
testImplementation 'junit:junit:4.+'
androidTestImplementation 'androidx.test.ext:junit:1.1.2'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
// MediaPipe hands solution API and solution-core.
implementation 'com.google.mediapipe:solution-core:latest.release'
implementation 'com.google.mediapipe:facemesh:latest.release'
// MediaPipe deps
implementation 'com.google.flogger:flogger:latest.release'
implementation 'com.google.flogger:flogger-system-backend:latest.release'
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library
def camerax_version = "1.0.0-beta10"
implementation "androidx.camera:camera-core:$camerax_version"
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
}

View File

@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.examples.facemesh">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="30" />
<!-- For loading images from gallery -->
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"
android:label="MediaPipe FaceMesh"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity android:name=".MainActivity"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -0,0 +1,44 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
android_binary(
name = "facemesh",
srcs = glob(["**/*.java"]),
custom_package = "com.google.mediapipe.examples.facemesh",
manifest = "AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.examples.facemesh",
},
multidex = "native",
resource_files = ["//mediapipe/examples/android/solutions:resource_files"],
deps = [
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
"//mediapipe/java/com/google/mediapipe/solutions/facemesh",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:opencv",
"@maven//:androidx_activity_activity",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_fragment_fragment",
"@maven//:com_google_guava_guava",
],
)

View File

@ -0,0 +1,186 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facemesh;
import android.opengl.GLES20;
import android.opengl.Matrix;
import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.ResultGlBoundary;
import com.google.mediapipe.solutioncore.ResultGlRenderer;
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.util.List;
/** A custom implementation of {@link ResultGlRenderer} to render MediaPope FaceMesh results. */
public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult> {
private static final String TAG = "FaceMeshResultGlRenderer";
private static final float[] TESSELATION_COLOR = new float[] {0.75f, 0.75f, 0.75f, 0.5f};
private static final int TESSELATION_THICKNESS = 5;
private static final float[] RIGHT_EYE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final int RIGHT_EYE_THICKNESS = 8;
private static final float[] RIGHT_EYEBROW_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final int RIGHT_EYEBROW_THICKNESS = 8;
private static final float[] LEFT_EYE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final int LEFT_EYE_THICKNESS = 8;
private static final float[] LEFT_EYEBROW_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final int LEFT_EYEBROW_THICKNESS = 8;
private static final float[] FACE_OVAL_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
private static final int FACE_OVAL_THICKNESS = 8;
private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
private static final int LIPS_THICKNESS = 8;
private static final String VERTEX_SHADER =
"uniform mat4 uTransformMatrix;\n"
+ "attribute vec4 vPosition;\n"
+ "void main() {\n"
+ " gl_Position = uTransformMatrix * vPosition;\n"
+ "}";
private static final String FRAGMENT_SHADER =
"precision mediump float;\n"
+ "uniform vec4 uColor;\n"
+ "void main() {\n"
+ " gl_FragColor = uColor;\n"
+ "}";
private int program;
private int positionHandle;
private int transformMatrixHandle;
private int colorHandle;
private final float[] transformMatrix = new float[16];
private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type);
GLES20.glShaderSource(shader, shaderCode);
GLES20.glCompileShader(shader);
return shader;
}
@Override
public void setupRendering() {
program = GLES20.glCreateProgram();
int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER);
int fragmentShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER);
GLES20.glAttachShader(program, vertexShader);
GLES20.glAttachShader(program, fragmentShader);
GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix");
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
}
@Override
public void renderResult(FaceMeshResult result, ResultGlBoundary boundary) {
if (result == null) {
return;
}
GLES20.glUseProgram(program);
// Sets the transform matrix to align the result rendering with the scaled output texture.
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
// top-left corner.
Matrix.setIdentityM(transformMatrix, 0);
Matrix.scaleM(
transformMatrix,
0,
2 / (boundary.right() - boundary.left()),
-2 / (boundary.top() - boundary.bottom()),
1.0f);
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
int numFaces = result.multiFaceLandmarks().size();
for (int i = 0; i < numFaces; ++i) {
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_TESSELATION,
TESSELATION_COLOR,
TESSELATION_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_EYE,
RIGHT_EYE_COLOR,
RIGHT_EYE_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
RIGHT_EYEBROW_COLOR,
RIGHT_EYEBROW_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYE,
LEFT_EYE_COLOR,
LEFT_EYE_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
LEFT_EYEBROW_COLOR,
LEFT_EYEBROW_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_FACE_OVAL,
FACE_OVAL_COLOR,
FACE_OVAL_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LIPS,
LIPS_COLOR,
LIPS_THICKNESS);
}
}
/**
* Calls this to delete the shader program.
*
* <p>This is only necessary if one wants to release the program while keeping the context around.
*/
public void release() {
GLES20.glDeleteProgram(program);
}
private void drawLandmarks(
List<NormalizedLandmark> faceLandmarkList,
ImmutableSet<FaceMeshConnections.Connection> connections,
float[] colorArray,
int thickness) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
GLES20.glLineWidth(thickness);
for (FaceMeshConnections.Connection c : connections) {
float[] vertex = new float[4];
NormalizedLandmark start = faceLandmarkList.get(c.start());
vertex[0] = normalizedLandmarkValue(start.getX());
vertex[1] = normalizedLandmarkValue(start.getY());
NormalizedLandmark end = faceLandmarkList.get(c.end());
vertex[2] = normalizedLandmarkValue(end.getX());
vertex[3] = normalizedLandmarkValue(end.getY());
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertex.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertex);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
}
}
// Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate
// value range: [-1, 1].
private float normalizedLandmarkValue(float value) {
return value * 2 - 1;
}
}

View File

@ -0,0 +1,158 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facemesh;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Matrix;
import android.graphics.Paint;
import androidx.appcompat.widget.AppCompatImageView;
import android.util.Size;
import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.util.List;
/** An ImageView implementation for displaying MediaPipe FaceMesh results. */
public class FaceMeshResultImageView extends AppCompatImageView {
private static final String TAG = "FaceMeshResultImageView";
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
private static final int TESSELATION_THICKNESS = 5;
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYE_THICKNESS = 8;
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYEBROW_THICKNESS = 8;
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYE_THICKNESS = 8;
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYEBROW_THICKNESS = 8;
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
private static final int FACE_OVAL_THICKNESS = 8;
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
private static final int LIPS_THICKNESS = 8;
private Bitmap latest;
public FaceMeshResultImageView(Context context) {
super(context);
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
}
/**
* Sets a {@link FaceMeshResult} to render.
*
* @param result a {@link FaceMeshResult} object that contains the solution outputs and the input
* {@link Bitmap}.
*/
public void setFaceMeshResult(FaceMeshResult result) {
if (result == null) {
return;
}
Bitmap bmInput = result.inputBitmap();
int width = bmInput.getWidth();
int height = bmInput.getHeight();
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
Canvas canvas = new Canvas(latest);
Size imageSize = new Size(width, height);
canvas.drawBitmap(bmInput, new Matrix(), null);
int numFaces = result.multiFaceLandmarks().size();
for (int i = 0; i < numFaces; ++i) {
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_TESSELATION,
imageSize,
TESSELATION_COLOR,
TESSELATION_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_EYE,
imageSize,
RIGHT_EYE_COLOR,
RIGHT_EYE_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_EYEBROW,
imageSize,
RIGHT_EYEBROW_COLOR,
RIGHT_EYEBROW_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYE,
imageSize,
LEFT_EYE_COLOR,
LEFT_EYE_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W,
imageSize,
LEFT_EYEBROW_COLOR,
LEFT_EYEBROW_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_FACE_OVAL,
imageSize,
FACE_OVAL_COLOR,
FACE_OVAL_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LIPS,
imageSize,
LIPS_COLOR,
LIPS_THICKNESS);
}
}
/** Updates the image view with the latest facemesh result. */
public void update() {
postInvalidate();
if (latest != null) {
setImageBitmap(latest);
}
}
// TODO: Better hand landmark and hand connection drawing.
private void drawLandmarksOnCanvas(
Canvas canvas,
List<NormalizedLandmark> faceLandmarkList,
ImmutableSet<FaceMeshConnections.Connection> connections,
Size imageSize,
int color,
int thickness) {
// Draw connections.
for (FaceMeshConnections.Connection c : connections) {
Paint connectionPaint = new Paint();
connectionPaint.setColor(color);
connectionPaint.setStrokeWidth(thickness);
NormalizedLandmark start = faceLandmarkList.get(c.start());
NormalizedLandmark end = faceLandmarkList.get(c.end());
canvas.drawLine(
start.getX() * imageSize.getWidth(),
start.getY() * imageSize.getHeight(),
end.getX() * imageSize.getWidth(),
end.getY() * imageSize.getHeight(),
connectionPaint);
}
}
}

View File

@ -0,0 +1,308 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facemesh;
import android.content.Intent;
import android.graphics.Bitmap;
import android.os.Bundle;
import android.provider.MediaStore;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.FrameLayout;
import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
import com.google.mediapipe.solutioncore.VideoInput;
import com.google.mediapipe.solutions.facemesh.FaceMesh;
import com.google.mediapipe.solutions.facemesh.FaceMeshOptions;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.io.IOException;
/** Main activity of MediaPipe FaceMesh app. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private FaceMesh facemesh;
// Run the pipeline and the model inference on GPU or CPU.
private static final boolean RUN_ON_GPU = true;
private enum InputSource {
UNKNOWN,
IMAGE,
VIDEO,
CAMERA,
}
private InputSource inputSource = InputSource.UNKNOWN;
// Image demo UI and image loader components.
private ActivityResultLauncher<Intent> imageGetter;
private FaceMeshResultImageView imageView;
// Video demo UI and video loader components.
private VideoInput videoInput;
private ActivityResultLauncher<Intent> videoGetter;
// Live camera demo UI and camera components.
private CameraInput cameraInput;
private SolutionGlSurfaceView<FaceMeshResult> glSurfaceView;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
setupStaticImageDemoUiComponents();
setupVideoDemoUiComponents();
setupLiveDemoUiComponents();
}
@Override
protected void onResume() {
super.onResume();
if (inputSource == InputSource.CAMERA) {
// Restarts the camera and the opengl surface rendering.
cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
glSurfaceView.post(this::startCamera);
glSurfaceView.setVisibility(View.VISIBLE);
} else if (inputSource == InputSource.VIDEO) {
videoInput.resume();
}
}
@Override
protected void onPause() {
super.onPause();
if (inputSource == InputSource.CAMERA) {
glSurfaceView.setVisibility(View.GONE);
cameraInput.close();
} else if (inputSource == InputSource.VIDEO) {
videoInput.pause();
}
}
/** Sets up the UI components for the static image demo. */
private void setupStaticImageDemoUiComponents() {
// The Intent to access gallery and read images as bitmap.
imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
facemesh.send(bitmap);
}
}
}
});
Button loadImageButton = findViewById(R.id.button_load_picture);
loadImageButton.setOnClickListener(
v -> {
if (inputSource != InputSource.IMAGE) {
stopCurrentPipeline();
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
});
imageView = new FaceMeshResultImageView(this);
}
/** The core MediaPipe FaceMesh setup workflow for its static image mode. */
private void setupStaticImageModePipeline() {
this.inputSource = InputSource.IMAGE;
// Initializes a new MediaPipe FaceMesh instance in the static image mode.
facemesh =
new FaceMesh(
this,
FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE)
.setRunOnGpu(RUN_ON_GPU)
.build());
// Connects MediaPipe FaceMesh to the user-defined FaceMeshResultImageView.
facemesh.setResultListener(
faceMeshResult -> {
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true);
imageView.setFaceMeshResult(faceMeshResult);
runOnUiThread(() -> imageView.update());
});
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
// Updates the preview layout.
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
frameLayout.removeAllViewsInLayout();
imageView.setImageDrawable(null);
frameLayout.addView(imageView);
imageView.setVisibility(View.VISIBLE);
}
/** Sets up the UI components for the video demo. */
private void setupVideoDemoUiComponents() {
// The Intent to access gallery and read a video file.
videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
facemesh.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Button loadVideoButton = findViewById(R.id.button_load_video);
loadVideoButton.setOnClickListener(
v -> {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
});
}
/** Sets up the UI components for the live demo with camera input. */
private void setupLiveDemoUiComponents() {
Button startCameraButton = findViewById(R.id.button_start_camera);
startCameraButton.setOnClickListener(
v -> {
if (inputSource == InputSource.CAMERA) {
return;
}
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.CAMERA);
});
}
/** The core MediaPipe FaceMesh setup workflow for its streaming mode. */
private void setupStreamingModePipeline(InputSource inputSource) {
this.inputSource = inputSource;
// Initializes a new MediaPipe FaceMesh instance in the streaming mode.
facemesh =
new FaceMesh(
this,
FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE)
.setRunOnGpu(RUN_ON_GPU)
.build());
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message));
if (inputSource == InputSource.CAMERA) {
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
} else if (inputSource == InputSource.VIDEO) {
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
videoInput = new VideoInput(this);
videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
}
// Initializes a new Gl surface view with a user-defined FaceMeshResultGlRenderer.
glSurfaceView =
new SolutionGlSurfaceView<>(this, facemesh.getGlContext(), facemesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
facemesh.setResultListener(
faceMeshResult -> {
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ false);
glSurfaceView.setRenderData(faceMeshResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the gl surface view is attached.
// For video input source, videoInput.start() will be called when the video uri is available.
if (inputSource == InputSource.CAMERA) {
glSurfaceView.post(this::startCamera);
}
// Updates the preview layout.
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
imageView.setVisibility(View.GONE);
frameLayout.removeAllViewsInLayout();
frameLayout.addView(glSurfaceView);
glSurfaceView.setVisibility(View.VISIBLE);
frameLayout.requestLayout();
}
private void startCamera() {
cameraInput.start(
this,
facemesh.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight());
}
private void stopCurrentPipeline() {
if (cameraInput != null) {
cameraInput.setNewFrameListener(null);
cameraInput.close();
}
if (videoInput != null) {
videoInput.setNewFrameListener(null);
videoInput.close();
}
if (glSurfaceView != null) {
glSurfaceView.setVisibility(View.GONE);
}
if (facemesh != null) {
facemesh.close();
}
}
private void logNoseLandmark(FaceMeshResult result, boolean showPixelValues) {
if (result == null || result.multiFaceLandmarks().isEmpty()) {
return;
}
NormalizedLandmark noseLandmark = result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) {
int width = result.inputBitmap().getWidth();
int height = result.inputBitmap().getHeight();
Log.i(
TAG,
String.format(
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f",
noseLandmark.getX() * width, noseLandmark.getY() * height));
} else {
Log.i(
TAG,
String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY()));
}
}
}

View File

@ -0,0 +1 @@
../../../res

View File

@ -19,7 +19,8 @@
android:roundIcon="@mipmap/ic_launcher_round" android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true" android:supportsRtl="true"
android:theme="@style/AppTheme"> android:theme="@style/AppTheme">
<activity android:name=".MainActivity"> <activity android:name=".MainActivity"
android:screenOrientation="portrait">
<intent-filter> <intent-filter>
<action android:name="android.intent.action.MAIN" /> <action android:name="android.intent.action.MAIN" />

View File

@ -31,10 +31,14 @@ android_binary(
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input", "//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib", "//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering", "//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
"//mediapipe/java/com/google/mediapipe/solutions/hands", "//mediapipe/java/com/google/mediapipe/solutions/hands",
"//third_party:androidx_appcompat", "//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout", "//third_party:androidx_constraint_layout",
"//third_party:opencv",
"@maven//:androidx_activity_activity",
"@maven//:androidx_concurrent_concurrent_futures", "@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_fragment_fragment",
"@maven//:com_google_guava_guava", "@maven//:com_google_guava_guava",
], ],
) )

View File

@ -46,7 +46,6 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
private int positionHandle; private int positionHandle;
private int transformMatrixHandle; private int transformMatrixHandle;
private final float[] transformMatrix = new float[16]; private final float[] transformMatrix = new float[16];
private FloatBuffer vertexBuffer;
private int loadShader(int type, String shaderCode) { private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type); int shader = GLES20.glCreateShader(type);
@ -74,12 +73,15 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
} }
GLES20.glUseProgram(program); GLES20.glUseProgram(program);
// Sets the transform matrix to align the result rendering with the scaled output texture. // Sets the transform matrix to align the result rendering with the scaled output texture.
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
// top-left corner.
Matrix.setIdentityM(transformMatrix, 0); Matrix.setIdentityM(transformMatrix, 0);
Matrix.scaleM( Matrix.scaleM(
transformMatrix, transformMatrix,
0, 0,
2 / (boundary.right() - boundary.left()), 2 / (boundary.right() - boundary.left()),
2 / (boundary.top() - boundary.bottom()), -2 / (boundary.top() - boundary.bottom()),
1.0f); 1.0f);
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0); GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
GLES20.glLineWidth(CONNECTION_THICKNESS); GLES20.glLineWidth(CONNECTION_THICKNESS);
@ -109,7 +111,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
NormalizedLandmark end = handLandmarkList.get(c.end()); NormalizedLandmark end = handLandmarkList.get(c.end());
vertex[2] = normalizedLandmarkValue(end.getX()); vertex[2] = normalizedLandmarkValue(end.getX());
vertex[3] = normalizedLandmarkValue(end.getY()); vertex[3] = normalizedLandmarkValue(end.getY());
vertexBuffer = FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertex.length * 4) ByteBuffer.allocateDirect(vertex.length * 4)
.order(ByteOrder.nativeOrder()) .order(ByteOrder.nativeOrder())
.asFloatBuffer() .asFloatBuffer()

View File

@ -20,7 +20,7 @@ import android.graphics.Canvas;
import android.graphics.Color; import android.graphics.Color;
import android.graphics.Matrix; import android.graphics.Matrix;
import android.graphics.Paint; import android.graphics.Paint;
import android.widget.ImageView; import androidx.appcompat.widget.AppCompatImageView;
import com.google.mediapipe.formats.proto.LandmarkProto; import com.google.mediapipe.formats.proto.LandmarkProto;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutions.hands.Hands; import com.google.mediapipe.solutions.hands.Hands;
@ -28,17 +28,18 @@ import com.google.mediapipe.solutions.hands.HandsResult;
import java.util.List; import java.util.List;
/** An ImageView implementation for displaying MediaPipe Hands results. */ /** An ImageView implementation for displaying MediaPipe Hands results. */
public class HandsResultImageView extends ImageView { public class HandsResultImageView extends AppCompatImageView {
private static final String TAG = "HandsResultImageView"; private static final String TAG = "HandsResultImageView";
private static final int LANDMARK_COLOR = Color.RED; private static final int LANDMARK_COLOR = Color.RED;
private static final int LANDMARK_RADIUS = 15; private static final int LANDMARK_RADIUS = 15;
private static final int CONNECTION_COLOR = Color.GREEN; private static final int CONNECTION_COLOR = Color.GREEN;
private static final int CONNECTION_THICKNESS = 10; private static final int CONNECTION_THICKNESS = 10;
private Bitmap latest;
public HandsResultImageView(Context context) { public HandsResultImageView(Context context) {
super(context); super(context);
setScaleType(ImageView.ScaleType.FIT_CENTER); setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
} }
/** /**
@ -54,8 +55,8 @@ public class HandsResultImageView extends ImageView {
Bitmap bmInput = result.inputBitmap(); Bitmap bmInput = result.inputBitmap();
int width = bmInput.getWidth(); int width = bmInput.getWidth();
int height = bmInput.getHeight(); int height = bmInput.getHeight();
Bitmap bmOutput = Bitmap.createBitmap(width, height, bmInput.getConfig()); latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
Canvas canvas = new Canvas(bmOutput); Canvas canvas = new Canvas(latest);
canvas.drawBitmap(bmInput, new Matrix(), null); canvas.drawBitmap(bmInput, new Matrix(), null);
int numHands = result.multiHandLandmarks().size(); int numHands = result.multiHandLandmarks().size();
@ -63,8 +64,14 @@ public class HandsResultImageView extends ImageView {
drawLandmarksOnCanvas( drawLandmarksOnCanvas(
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height); result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
} }
}
/** Updates the image view with the latest hands result. */
public void update() {
postInvalidate(); postInvalidate();
setImageBitmap(bmOutput); if (latest != null) {
setImageBitmap(latest);
}
} }
// TODO: Better hand landmark and hand connection drawing. // TODO: Better hand landmark and hand connection drawing.

View File

@ -28,6 +28,7 @@ import androidx.activity.result.contract.ActivityResultContracts;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
import com.google.mediapipe.solutioncore.VideoInput;
import com.google.mediapipe.solutions.hands.HandLandmark; import com.google.mediapipe.solutions.hands.HandLandmark;
import com.google.mediapipe.solutions.hands.Hands; import com.google.mediapipe.solutions.hands.Hands;
import com.google.mediapipe.solutions.hands.HandsOptions; import com.google.mediapipe.solutions.hands.HandsOptions;
@ -39,14 +40,24 @@ public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity"; private static final String TAG = "MainActivity";
private Hands hands; private Hands hands;
private int mode = HandsOptions.STATIC_IMAGE_MODE; // Run the pipeline and the model inference on GPU or CPU.
private static final boolean RUN_ON_GPU = true;
private enum InputSource {
UNKNOWN,
IMAGE,
VIDEO,
CAMERA,
}
private InputSource inputSource = InputSource.UNKNOWN;
// Image demo UI and image loader components. // Image demo UI and image loader components.
private Button loadImageButton;
private ActivityResultLauncher<Intent> imageGetter; private ActivityResultLauncher<Intent> imageGetter;
private HandsResultImageView imageView; private HandsResultImageView imageView;
// Video demo UI and video loader components.
private VideoInput videoInput;
private ActivityResultLauncher<Intent> videoGetter;
// Live camera demo UI and camera components. // Live camera demo UI and camera components.
private Button startCameraButton;
private CameraInput cameraInput; private CameraInput cameraInput;
private SolutionGlSurfaceView<HandsResult> glSurfaceView; private SolutionGlSurfaceView<HandsResult> glSurfaceView;
@ -55,26 +66,32 @@ public class MainActivity extends AppCompatActivity {
super.onCreate(savedInstanceState); super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main); setContentView(R.layout.activity_main);
setupStaticImageDemoUiComponents(); setupStaticImageDemoUiComponents();
setupVideoDemoUiComponents();
setupLiveDemoUiComponents(); setupLiveDemoUiComponents();
} }
@Override @Override
protected void onResume() { protected void onResume() {
super.onResume(); super.onResume();
if (mode == HandsOptions.STREAMING_MODE) { if (inputSource == InputSource.CAMERA) {
// Restarts the camera and the opengl surface rendering. // Restarts the camera and the opengl surface rendering.
cameraInput = new CameraInput(this); cameraInput = new CameraInput(this);
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame)); cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
glSurfaceView.post(this::startCamera); glSurfaceView.post(this::startCamera);
glSurfaceView.setVisibility(View.VISIBLE); glSurfaceView.setVisibility(View.VISIBLE);
} else if (inputSource == InputSource.VIDEO) {
videoInput.resume();
} }
} }
@Override @Override
protected void onPause() { protected void onPause() {
super.onPause(); super.onPause();
if (mode == HandsOptions.STREAMING_MODE) { if (inputSource == InputSource.CAMERA) {
stopLiveDemo(); glSurfaceView.setVisibility(View.GONE);
cameraInput.close();
} else if (inputSource == InputSource.VIDEO) {
videoInput.pause();
} }
} }
@ -102,80 +119,122 @@ public class MainActivity extends AppCompatActivity {
} }
} }
}); });
loadImageButton = (Button) findViewById(R.id.button_load_picture); Button loadImageButton = findViewById(R.id.button_load_picture);
loadImageButton.setOnClickListener( loadImageButton.setOnClickListener(
new View.OnClickListener() { v -> {
@Override if (inputSource != InputSource.IMAGE) {
public void onClick(View v) { stopCurrentPipeline();
if (mode == HandsOptions.STREAMING_MODE) { setupStaticImageModePipeline();
stopLiveDemo();
}
if (hands == null || mode != HandsOptions.STATIC_IMAGE_MODE) {
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
} }
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
}); });
imageView = new HandsResultImageView(this); imageView = new HandsResultImageView(this);
} }
/** The core MediaPipe Hands setup workflow for its static image mode. */ /** The core MediaPipe Hands setup workflow for its static image mode. */
private void setupStaticImageModePipeline() { private void setupStaticImageModePipeline() {
this.inputSource = InputSource.IMAGE;
// Initializes a new MediaPipe Hands instance in the static image mode. // Initializes a new MediaPipe Hands instance in the static image mode.
mode = HandsOptions.STATIC_IMAGE_MODE; hands =
if (hands != null) { new Hands(
hands.close(); this,
} HandsOptions.builder()
hands = new Hands(this, HandsOptions.builder().setMode(mode).build()); .setMode(HandsOptions.STATIC_IMAGE_MODE)
.setMaxNumHands(1)
.setRunOnGpu(RUN_ON_GPU)
.build());
// Connects MediaPipe Hands to the user-defined HandsResultImageView. // Connects MediaPipe Hands to the user-defined HandsResultImageView.
hands.setResultListener( hands.setResultListener(
handsResult -> { handsResult -> {
logWristLandmark(handsResult, /*showPixelValues=*/ true); logWristLandmark(handsResult, /*showPixelValues=*/ true);
runOnUiThread(() -> imageView.setHandsResult(handsResult)); imageView.setHandsResult(handsResult);
runOnUiThread(() -> imageView.update());
}); });
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message)); hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Updates the preview layout. // Updates the preview layout.
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout); FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
frameLayout.removeAllViewsInLayout(); frameLayout.removeAllViewsInLayout();
imageView.setImageDrawable(null); imageView.setImageDrawable(null);
frameLayout.addView(imageView); frameLayout.addView(imageView);
imageView.setVisibility(View.VISIBLE); imageView.setVisibility(View.VISIBLE);
} }
/** Sets up the UI components for the video demo. */
private void setupVideoDemoUiComponents() {
// The Intent to access gallery and read a video file.
videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
hands.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Button loadVideoButton = findViewById(R.id.button_load_video);
loadVideoButton.setOnClickListener(
v -> {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
});
}
/** Sets up the UI components for the live demo with camera input. */ /** Sets up the UI components for the live demo with camera input. */
private void setupLiveDemoUiComponents() { private void setupLiveDemoUiComponents() {
startCameraButton = (Button) findViewById(R.id.button_start_camera); Button startCameraButton = findViewById(R.id.button_start_camera);
startCameraButton.setOnClickListener( startCameraButton.setOnClickListener(
new View.OnClickListener() { v -> {
@Override if (inputSource == InputSource.CAMERA) {
public void onClick(View v) { return;
if (hands == null || mode != HandsOptions.STREAMING_MODE) {
setupStreamingModePipeline();
}
} }
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.CAMERA);
}); });
} }
/** The core MediaPipe Hands setup workflow for its streaming mode. */ /** The core MediaPipe Hands setup workflow for its streaming mode. */
private void setupStreamingModePipeline() { private void setupStreamingModePipeline(InputSource inputSource) {
this.inputSource = inputSource;
// Initializes a new MediaPipe Hands instance in the streaming mode. // Initializes a new MediaPipe Hands instance in the streaming mode.
mode = HandsOptions.STREAMING_MODE; hands =
if (hands != null) { new Hands(
hands.close(); this,
HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE)
.setMaxNumHands(1)
.setRunOnGpu(RUN_ON_GPU)
.build());
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
if (inputSource == InputSource.CAMERA) {
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
} else if (inputSource == InputSource.VIDEO) {
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
videoInput = new VideoInput(this);
videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
} }
hands = new Hands(this, HandsOptions.builder().setMode(mode).build());
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Hands. // Initializes a new Gl surface view with a user-defined HandsResultGlRenderer.
cameraInput = new CameraInput(this);
cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame));
// Initalizes a new Gl surface view with a user-defined HandsResultGlRenderer.
glSurfaceView = glSurfaceView =
new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion()); new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer()); glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer());
@ -188,10 +247,13 @@ public class MainActivity extends AppCompatActivity {
}); });
// The runnable to start camera after the gl surface view is attached. // The runnable to start camera after the gl surface view is attached.
glSurfaceView.post(this::startCamera); // For video input source, videoInput.start() will be called when the video uri is available.
if (inputSource == InputSource.CAMERA) {
glSurfaceView.post(this::startCamera);
}
// Updates the preview layout. // Updates the preview layout.
FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout); FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
imageView.setVisibility(View.GONE); imageView.setVisibility(View.GONE);
frameLayout.removeAllViewsInLayout(); frameLayout.removeAllViewsInLayout();
frameLayout.addView(glSurfaceView); frameLayout.addView(glSurfaceView);
@ -208,34 +270,40 @@ public class MainActivity extends AppCompatActivity {
glSurfaceView.getHeight()); glSurfaceView.getHeight());
} }
private void stopLiveDemo() { private void stopCurrentPipeline() {
if (cameraInput != null) { if (cameraInput != null) {
cameraInput.stop(); cameraInput.setNewFrameListener(null);
cameraInput.close();
}
if (videoInput != null) {
videoInput.setNewFrameListener(null);
videoInput.close();
} }
if (glSurfaceView != null) { if (glSurfaceView != null) {
glSurfaceView.setVisibility(View.GONE); glSurfaceView.setVisibility(View.GONE);
} }
if (hands != null) {
hands.close();
}
} }
private void logWristLandmark(HandsResult result, boolean showPixelValues) { private void logWristLandmark(HandsResult result, boolean showPixelValues) {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST); NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
// For Bitmaps, show the pixel values. For texture inputs, show the normoralized cooridanates. // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) { if (showPixelValues) {
int width = result.inputBitmap().getWidth(); int width = result.inputBitmap().getWidth();
int height = result.inputBitmap().getHeight(); int height = result.inputBitmap().getHeight();
Log.i( Log.i(
TAG, TAG,
"MediaPipe Hand wrist coordinates (pixel values): x= " String.format(
+ wristLandmark.getX() * width "MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f",
+ " y=" wristLandmark.getX() * width, wristLandmark.getY() * height));
+ wristLandmark.getY() * height);
} else { } else {
Log.i( Log.i(
TAG, TAG,
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x= " String.format(
+ wristLandmark.getX() "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
+ " y=" wristLandmark.getX(), wristLandmark.getY()));
+ wristLandmark.getY());
} }
} }
} }

View File

@ -8,18 +8,23 @@
android:id="@+id/buttons" android:id="@+id/buttons"
android:layout_width="match_parent" android:layout_width="match_parent"
android:layout_height="wrap_content" android:layout_height="wrap_content"
android:gravity="center" style="?android:attr/buttonBarStyle" android:gravity="center"
android:orientation="horizontal"> android:orientation="horizontal">
<Button <Button
android:id="@+id/button_load_picture" android:id="@+id/button_load_picture"
android:layout_width="wrap_content" android:layout_width="wrap_content"
android:layout_height="wrap_content" style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
android:text="Load Picture" /> android:text="@string/load_picture" />
<Button
android:id="@+id/button_load_video"
android:layout_width="wrap_content"
style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
android:text="@string/load_video" />
<Button <Button
android:id="@+id/button_start_camera" android:id="@+id/button_start_camera"
android:layout_width="wrap_content" android:layout_width="wrap_content"
android:layout_height="wrap_content" style="?android:attr/buttonBarButtonStyle" android:layout_height="wrap_content"
android:text="Start Camera" /> android:text="@string/start_camera" />
</LinearLayout> </LinearLayout>
<FrameLayout <FrameLayout
android:id="@+id/preview_display_layout" android:id="@+id/preview_display_layout"
@ -27,9 +32,9 @@
android:layout_height="match_parent"> android:layout_height="match_parent">
<TextView <TextView
android:id="@+id/no_view" android:id="@+id/no_view"
android:layout_width="wrap_content" android:layout_width="match_parent"
android:layout_height="wrap_content" android:layout_height="wrap_content"
android:gravity="center" android:gravity="center"
android:text="Please press any button above to start" /> android:text="@string/instruction" />
</FrameLayout> </FrameLayout>
</LinearLayout> </LinearLayout>

View File

@ -1,3 +1,6 @@
<resources> <resources>
<string name="no_camera_access" translatable="false">Please grant camera permissions.</string> <string name="load_picture" translatable="false">Load Picture</string>
<string name="load_video" translatable="false">Load Video</string>
<string name="start_camera" translatable="false">Start Camera</string>
<string name="instruction" translatable="false">Please press any button above to start</string>
</resources> </resources>

View File

@ -1,2 +1,3 @@
rootProject.name = "mediapipe-solutions-examples" rootProject.name = "mediapipe-solutions-examples"
include ':hands' include ':hands'
include ':facemesh'

View File

@ -169,6 +169,7 @@ public class MainActivity extends AppCompatActivity {
public void startCamera() { public void startCamera() {
cameraHelper = new CameraXPreviewHelper(); cameraHelper = new CameraXPreviewHelper();
previewFrameTexture = converter.getSurfaceTexture();
cameraHelper.setOnCameraStartedListener( cameraHelper.setOnCameraStartedListener(
surfaceTexture -> { surfaceTexture -> {
onCameraStarted(surfaceTexture); onCameraStarted(surfaceTexture);
@ -178,7 +179,7 @@ public class MainActivity extends AppCompatActivity {
? CameraHelper.CameraFacing.FRONT ? CameraHelper.CameraFacing.FRONT
: CameraHelper.CameraFacing.BACK; : CameraHelper.CameraFacing.BACK;
cameraHelper.startCamera( cameraHelper.startCamera(
this, cameraFacing, /*unusedSurfaceTexture=*/ null, cameraTargetResolution()); this, cameraFacing, previewFrameTexture, cameraTargetResolution());
} }
protected Size computeViewSize(int width, int height) { protected Size computeViewSize(int width, int height) {
@ -194,11 +195,8 @@ public class MainActivity extends AppCompatActivity {
Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize); Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
boolean isCameraRotated = cameraHelper.isCameraRotated(); boolean isCameraRotated = cameraHelper.isCameraRotated();
// Connect the converter to the camera-preview frames as its input (via // Configure the output width and height as the computed display size.
// previewFrameTexture), and configure the output width and height as the computed converter.setDestinationSize(
// display size.
converter.setSurfaceTextureAndAttachToGLContext(
previewFrameTexture,
isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(), isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
isCameraRotated ? displaySize.getWidth() : displaySize.getHeight()); isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
} }

View File

@ -43,6 +43,7 @@ cc_library(
cc_binary( cc_binary(
name = "object_detection_tpu", name = "object_detection_tpu",
deps = [ deps = [
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/examples/coral:demo_run_graph_main", "//mediapipe/examples/coral:demo_run_graph_main",
"//mediapipe/graphs/object_detection:desktop_tflite_calculators", "//mediapipe/graphs/object_detection:desktop_tflite_calculators",
], ],
@ -51,6 +52,12 @@ cc_binary(
cc_binary( cc_binary(
name = "face_detection_tpu", name = "face_detection_tpu",
deps = [ deps = [
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/tflite:tflite_converter_calculator",
"//mediapipe/calculators/tflite:tflite_inference_calculator",
"//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/examples/coral:demo_run_graph_main", "//mediapipe/examples/coral:demo_run_graph_main",
"//mediapipe/graphs/face_detection:desktop_live_calculators", "//mediapipe/graphs/face_detection:desktop_live_calculators",
], ],

View File

@ -1,86 +0,0 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#==== ! Prerequisite ! ====
# $ sh mediapipe/examples/coral/setup.sh
#====
# for opencv 3.2 default
FROM ubuntu:18.04
MAINTAINER <mediapipe@google.com>
WORKDIR /mediapipe
ENV DEBIAN_FRONTEND=noninteractive
# Install MediaPipe & Coral deps
COPY update_sources.sh /
RUN /update_sources.sh
RUN dpkg --add-architecture armhf
RUN dpkg --add-architecture arm64
RUN apt-get update && apt-get install -y \
build-essential \
crossbuild-essential-arm64 \
libusb-1.0-0-dev:arm64 \
zlibc:arm64 \
pkg-config \
zip \
unzip \
curl \
wget \
git \
python \
python-pip \
python3-pip \
python-numpy \
vim-common \
ca-certificates \
emacs \
software-properties-common && \
add-apt-repository -y ppa:openjdk-r/ppa && \
apt-get update && apt-get install -y openjdk-8-jdk
RUN pip install --upgrade setuptools
RUN pip install future
RUN pip3 install six
COPY . /mediapipe/
# Install bazel
# Please match the current MediaPipe Bazel requirements according to docs.
ARG BAZEL_VERSION=3.7.2
RUN mkdir /bazel && \
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
wget --no-check-certificate -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
chmod +x /bazel/installer.sh && \
/bazel/installer.sh && \
rm -f /bazel/installer.sh
# OpenCV (3.2 default in 18.04)
RUN apt-get update && apt-get install -y libopencv-dev
# Opencv libs copied from coral device into opencv32_arm64_libs
RUN cp opencv32_arm64_libs/* /usr/lib/aarch64-linux-gnu/.
# Edge tpu header and lib
RUN git clone https://github.com/google-coral/edgetpu.git /edgetpu
RUN cp /edgetpu/libedgetpu/direct/aarch64/libedgetpu.so.1.0 /usr/lib/aarch64-linux-gnu/libedgetpu.so
# See mediapipe/examples/coral/README.md to finish setup

View File

@ -0,0 +1,45 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM debian:buster
MAINTAINER <mediapipe@google.com>
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
crossbuild-essential-arm64 \
pkg-config \
zip \
unzip \
curl \
wget \
git \
tree \
vim \
sudo \
python3-all \
python3-pip \
python3-numpy \
ca-certificates \
software-properties-common \
libusb-1.0-0-dev \
libopencv-core-dev \
libopencv-imgproc-dev \
libopencv-video-dev \
libopencv-highgui-dev \
libopencv-videoio-dev \
libopencv-contrib-dev
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
RUN wget -O /usr/bin/bazel \
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
chmod +x /usr/bin/bazel
ENV BAZEL_CPU=k8

View File

@ -0,0 +1,47 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM debian:buster
MAINTAINER <mediapipe@google.com>
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
crossbuild-essential-arm64 \
pkg-config \
zip \
unzip \
curl \
wget \
git \
tree \
vim \
sudo \
python3-all \
python3-pip \
python3-numpy \
ca-certificates \
software-properties-common
RUN dpkg --add-architecture arm64
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libusb-1.0-0-dev:arm64 \
libopencv-core-dev:arm64 \
libopencv-imgproc-dev:arm64 \
libopencv-video-dev:arm64 \
libopencv-highgui-dev:arm64 \
libopencv-videoio-dev:arm64 \
libopencv-contrib-dev:arm64
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
RUN wget -O /usr/bin/bazel \
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
chmod +x /usr/bin/bazel
ENV BAZEL_CPU=aarch64

View File

@ -0,0 +1,47 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM debian:buster
MAINTAINER <mediapipe@google.com>
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
crossbuild-essential-armhf \
pkg-config \
zip \
unzip \
curl \
wget \
git \
tree \
vim \
sudo \
python3-all \
python3-pip \
python3-numpy \
ca-certificates \
software-properties-common
RUN dpkg --add-architecture armhf
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libusb-1.0-0-dev:armhf \
libopencv-core-dev:armhf \
libopencv-imgproc-dev:armhf \
libopencv-video-dev:armhf \
libopencv-highgui-dev:armhf \
libopencv-videoio-dev:armhf \
libopencv-contrib-dev:armhf
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 3
RUN wget -O /usr/bin/bazel \
https://github.com/bazelbuild/bazelisk/releases/download/v1.10.0/bazelisk-linux-amd64 && \
echo "038c0990a48ccd69932e4e8ecf8baa459e05a6b4c9e4cc492ac836b777caaf9d /usr/bin/bazel" sha256sum --check - && \
chmod +x /usr/bin/bazel
ENV BAZEL_CPU=armv7a

View File

@ -0,0 +1,55 @@
SHELL := /bin/bash
MAKEFILE_DIR := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
MEDIAPIPE_DIR := $(MAKEFILE_DIR)/../../..
BAZEL_COMPILATION_MODE ?= opt
BAZEL_TARGET ?= mediapipe/examples/coral:face_detection_tpu
BAZEL_CPU ?= k8
OUT_DIR := $(MEDIAPIPE_DIR)/out/$(BAZEL_CPU)
PLATFORM ?= amd64
DOCKER_FILE ?= $(MAKEFILE_DIR)/Dockerfile.$(PLATFORM)
DOCKER_COMMAND ?=
bazel_output = $(MEDIAPIPE_DIR)/bazel-bin/$(subst :,/,$(1))
define run_command
chmod a+w /; \
groupadd --gid $(shell id -g) $(shell id -g -n); \
useradd -m -e '' -s /bin/bash --gid $(shell id -g) --uid $(shell id -u) $(shell id -u -n); \
echo '$(shell id -u -n) ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers; \
su $(shell id -u -n) $(if $(1),-c '$(1)',)
endef
.PHONY: help
help:
@echo "make help - Print help"
@echo "make docker - Run Docker environment"
@echo "make build - Run Bazel build, use BAZEL_TARGET to choose which target to build"
ifeq (,$(wildcard /.dockerenv))
.PHONY: docker
docker:
docker run --rm -i --tty \
-v $(MEDIAPIPE_DIR):/mediapipe \
--workdir /mediapipe/ \
$(shell docker build -q - < $(DOCKER_FILE)) \
/bin/bash -c "$(call run_command,$(DOCKER_COMMAND))"
endif
.PHONY: build
build:
(cd $(MEDIAPIPE_DIR) && \
bazel build \
--crosstool_top=@crosstool//:toolchains \
--compiler=gcc \
--cpu=${BAZEL_CPU} \
--compilation_mode=${BAZEL_COMPILATION_MODE} \
--define darwinn_portable=1 \
--define MEDIAPIPE_DISABLE_GPU=1 \
--define MEDIAPIPE_EDGE_TPU=all \
$(BAZEL_TARGET) && \
mkdir -p $(OUT_DIR) && \
cp -f $(call bazel_output,$(BAZEL_TARGET)) $(OUT_DIR))

View File

@ -1,156 +1,173 @@
# Coral Dev Board Setup (experimental) # Coral Support
**Disclaimer**: Running MediaPipe on Coral is experimental, and this process may ## Bazel Setup
not be exact and is subject to change. These instructions have only been tested
on the [Coral Dev Board](https://coral.ai/products/dev-board/)
running [Mendel Enterprise Day 13](https://coral.ai/software/) OS and
using [Diploria2](https://github.com/google-coral/edgetpu/tree/diploria2)
edgetpu libs, and may vary for different devices and workstations.
This file describes how to prepare a Coral Dev Board and setup a Linux You can compile MediaPipe with enabled Edge TPU support to run
Docker container for building MediaPipe applications that run on Edge TPU. [Coral models](http://coral.ai/models). Just add
`--define MEDIAPIPE_EDGE_TPU=<type>` to the `bazel` command:
## Before creating the Docker * `--define MEDIAPIPE_EDGE_TPU=usb` for Coral USB devices on Linux and macOS
* `--define MEDIAPIPE_EDGE_TPU=pci` for Coral PCIe devices on Linux
* `--define MEDIAPIPE_EDGE_TPU=all` for both Coral USB and PCIe devices on Linux
* (on host machine) run _setup.sh_ from MediaPipe root directory You have to install `libusb` library in order to compile with USB support:
sh mediapipe/examples/coral/setup.sh * `libusb-1.0-0-dev` on Linux
* `libusb` on macOS via MacPorts or Homebrew
* Setup the coral device via [here](https://coral.withgoogle.com/docs/dev-board/get-started/), and ensure the _mdt_ command works Command to compile face detection Coral example:
Note: alias mdt="python3 -m mdt.main" may be needed on some systems ```bash
bazel build \
--compilation_mode=opt \
--define darwinn_portable=1 \
--define MEDIAPIPE_DISABLE_GPU=1 \
--define MEDIAPIPE_EDGE_TPU=usb \
--linkopt=-l:libusb-1.0.so \
mediapipe/examples/coral:face_detection_tpu build
```
* (on coral device) prepare MediaPipe ## Cross-compilation
cd ~ Sometimes you need to cross-compile MediaPipe source code, e.g. get `ARM32`
sudo apt-get update && sudo apt-get install -y git or `ARM64` binaries on `x86` system. Install cross-compilation toolchain on
git clone https://github.com/google/mediapipe.git your system or use our preconfigured Docker environment for that:
mkdir mediapipe/bazel-bin
* (on coral device) install opencv 3.2 ```bash
# For ARM32 (e.g. Raspberry Pi)
make -C mediapipe/examples/coral PLATFORM=armhf docker
sudo apt-get update && sudo apt-get install -y libopencv-dev # For ARM64 (e.g. Coral Dev Board)
make -C mediapipe/examples/coral PLATFORM=arm64 docker
```
* (on coral device) find all opencv libs After running this command you'll get a shell to the Docker environment which
has everything ready to start compilation:
find /usr/lib/aarch64-linux-gnu/ -name 'libopencv*so' ```bash
# For ARM32 (e.g. Raspberry Pi)
bazel build \
--crosstool_top=@crosstool//:toolchains \
--compiler=gcc \
--cpu=armv7a \
--define darwinn_portable=1 \
--define MEDIAPIPE_DISABLE_GPU=1 \
--define MEDIAPIPE_EDGE_TPU=usb \
--linkopt=-l:libusb-1.0.so \
mediapipe/examples/coral:face_detection_tpu build
* (on host machine) copy core opencv libs from coral device to a local folder inside MediaPipe checkout: # For ARM64 (e.g. Coral Dev Board)
bazel build \
--crosstool_top=@crosstool//:toolchains \
--compiler=gcc \
--cpu=aarch64 \
--define darwinn_portable=1 \
--define MEDIAPIPE_DISABLE_GPU=1 \
--define MEDIAPIPE_EDGE_TPU=usb \
--linkopt=-l:libusb-1.0.so \
mediapipe/examples/coral:face_detection_tpu build
```
# in root level mediapipe folder # Our Docker environment defines `${BAZEL_CPU}` value, so you can use it directly:
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_core.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_calib3d.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_features2d.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_highgui.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgcodecs.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_imgproc.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_video.so opencv32_arm64_libs
mdt pull /usr/lib/aarch64-linux-gnu/libopencv_videoio.so opencv32_arm64_libs
* (on host machine) Create and start the docker environment ```bash
bazel build \
--crosstool_top=@crosstool//:toolchains \
--compiler=gcc \
--cpu=${BAZEL_CPU} \
--define darwinn_portable=1 \
--define MEDIAPIPE_DISABLE_GPU=1 \
--define MEDIAPIPE_EDGE_TPU=usb \
--linkopt=-l:libusb-1.0.so \
mediapipe/examples/coral:face_detection_tpu build
```
# from mediapipe root level directory # The command above is already defined in our `Makefile`, so you can simply run:
docker build -t coral .
docker run -it --name coral coral:latest
## Inside the Docker environment ```bash
make -C mediapipe/examples/coral \
BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu \
build
```
* Update library paths in /mediapipe/third_party/opencv_linux.BUILD The output binary will be automatically copied to `out/<platform>` directory.
(replace 'x86_64-linux-gnu' with 'aarch64-linux-gnu') You can also run compilation inside Docker environment as a single
command:
"lib/aarch64-linux-gnu/libopencv_core.so", ```bash
"lib/aarch64-linux-gnu/libopencv_calib3d.so", make -C mediapipe/examples/coral \
"lib/aarch64-linux-gnu/libopencv_features2d.so", PLATFORM=armhf \
"lib/aarch64-linux-gnu/libopencv_highgui.so", DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
"lib/aarch64-linux-gnu/libopencv_imgcodecs.so", docker
"lib/aarch64-linux-gnu/libopencv_imgproc.so", ```
"lib/aarch64-linux-gnu/libopencv_video.so",
"lib/aarch64-linux-gnu/libopencv_videoio.so",
* Attempt to build hello world (to download external deps) and get the output binary from `out/<platform>` directory. Any Mediapipe target
can be cross-compiled this way, e.g. try
`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`.
bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hello_world:hello_world To summarize everything:
* Edit /edgetpu/libedgetpu/BUILD | Arch | PLATFORM | Output | Board |
| ----- | -------------- | ----------- | -------------------------------------------------------- |
| ARM32 | PLATFORM=armhf | out/armv7a | [Raspberry Pi](https://www.raspberrypi.org/products/) |
| ARM64 | PLATFORM=arm64 | out/aarch64 | [Coral Dev Board](https://coral.ai/products/dev-board/) |
to add this build target ## Coral Examples
cc_library( There are two Coral examples in `mediapipe/examples/coral` directory. Compile
name = "lib", them for your platform:
srcs = [
"libedgetpu.so",
],
visibility = ["//visibility:public"],
)
* Edit /edgetpu/WORKSPACE ```bash
# Face detection
make -C mediapipe/examples/coral \
PLATFORM=armhf \
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:face_detection_tpu build" \
docker
update /mediapipe/WORKSPACE TENSORFLOW_* variables to match what /edgetpu/WORKSPACE has: # Object detection
make -C mediapipe/examples/coral \
PLATFORM=armhf \
DOCKER_COMMAND="make -C mediapipe/examples/coral BAZEL_TARGET=mediapipe/examples/coral:object_detection_tpu build" \
docker
```
grep TENSORFLOW_ /mediapipe/WORKSPACE Copy output binaries along with corresponding auxiliary files to your target
grep TENSORFLOW_ /edgetpu/WORKSPACE system. You can copy the whole `mediapipe` folder for simplicity:
# Make sure the /mediapipe/WORKSPACE _TENSORFLOW_GIT_COMMIT and _TENSORFLOW_SHA256 ```bash
# match the /edgetpu/WORKSPACE TENSORFLOW_COMMIT and TENSORFLOW_SHA256 respectively. scp -r mediapipe <user>@<host>:.
```
# If they do not match, modify /mediapipe/WORKSPACE to match what /edgetpu/WORKSPACE has. OpenCV runtime libraries need to be installed on your target system:
# Also comment out the MediaPipe org_tensorflow patch section.
* Edit /mediapipe/mediapipe/calculators/tflite/BUILD to change rules for *tflite_inference_calculator.cc* ```bash
sudo apt-get install -y \
libopencv-core-dev \
libopencv-highgui-dev \
libopencv-calib3d-dev \
libopencv-features2d-dev \
libopencv-imgproc-dev \
libopencv-video-dev
```
sed -i 's/\":tflite_inference_calculator_cc_proto\",/\":tflite_inference_calculator_cc_proto\",\n\t\"@edgetpu\/\/:header\",\n\t\"@libedgetpu\/\/:lib\",/g' /mediapipe/mediapipe/calculators/tflite/BUILD If you are going to connect Coral USB accelerator to your target system then
you'll also need `libusb` library:
The above command should add ```shell
sudo apt-get install -y \
libusb-1.0-0
```
"@edgetpu//:header", Connect USB camera and Coral device to your target system and run the copied
"@libedgetpu//:lib", binaries:
to the _deps_ of tflite_inference_calculator.cc ```bash
# Face Detection
Now also remove XNNPACK deps: GLOG_logtostderr=1 ./face_detection_tpu --calculator_graph_config_file \
mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
sed -i 's/\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/#\"@org_tensorflow\/\/tensorflow\/lite\/delegates\/xnnpack/g' /mediapipe/mediapipe/calculators/tflite/BUILD
#### Now try cross-compiling for device
* Object detection demo
![Object Detection running on Coral](./images/object_detection_demo_coral.jpg)
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:object_detection_tpu
Copy object_detection_tpu binary to the MediaPipe checkout on the coral device
# outside docker env, open new terminal on host machine #
docker ps
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/object_detection_tpu /tmp/.
mdt push /tmp/object_detection_tpu /home/mendel/mediapipe/bazel-bin/.
* Face detection demo
![Face Detection running on Coral](./images/face_detection_demo_coral.gif)
bazel build -c opt --crosstool_top=@crosstool//:toolchains --compiler=gcc --cpu=aarch64 --define MEDIAPIPE_DISABLE_GPU=1 --copt -DMEDIAPIPE_EDGE_TPU --copt=-flax-vector-conversions mediapipe/examples/coral:face_detection_tpu
Copy face_detection_tpu binary to the MediaPipe checkout on the coral device
# outside docker env, open new terminal on host machine #
docker ps
docker cp <container-id>:/mediapipe/bazel-bin/mediapipe/examples/coral/face_detection_tpu /tmp/.
mdt push /tmp/face_detection_tpu /home/mendel/mediapipe/bazel-bin/.
## On the coral device (with display)
# Object detection
cd ~/mediapipe
chmod +x bazel-bin/object_detection_tpu
export GLOG_logtostderr=1
bazel-bin/object_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
# Face detection
cd ~/mediapipe
chmod +x bazel-bin/face_detection_tpu
export GLOG_logtostderr=1
bazel-bin/face_detection_tpu --calculator_graph_config_file=mediapipe/examples/coral/graphs/face_detection_desktop_live.pbtxt
# Object Detection
GLOG_logtostderr=1 ./object_detection_tpu --calculator_graph_config_file \
mediapipe/examples/coral/graphs/object_detection_desktop_live.pbtxt
```

View File

@ -1,30 +0,0 @@
### Coral additions to MediaPipe WORKSPACE ###
#COMMIT=$(git ls-remote https://github.com/google-coral/crosstool master | awk '{print $1}')
#SHA256=$(curl -L "https://github.com/google-coral/crosstool/archive/${COMMIT}.tar.gz" | sha256sum | awk '{print $1}')
# Oct 2019
#COMMIT=9e00d5be43bf001f883b5700f5d04882fea00229
#SHA256=cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb
http_archive(
name = "coral_crosstool",
sha256 = "cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb",
strip_prefix = "crosstool-9e00d5be43bf001f883b5700f5d04882fea00229",
urls = [
"https://github.com/google-coral/crosstool/archive/9e00d5be43bf001f883b5700f5d04882fea00229.tar.gz",
],
)
load("@coral_crosstool//:configure.bzl", "cc_crosstool")
cc_crosstool(name = "crosstool")
# EdgeTPU
new_local_repository(
name = "edgetpu",
path = "/edgetpu/libedgetpu",
build_file = "/edgetpu/libedgetpu/BUILD"
)
new_local_repository(
name = "libedgetpu",
path = "/usr/lib/aarch64-linux-gnu",
build_file = "/edgetpu/libedgetpu/BUILD"
)

View File

@ -74,43 +74,12 @@ node {
} }
} }
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 300
input_size_width: 300
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on # Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of # the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object. # detections. Each detection describes a detected object.
node { node {
calculator: "TfLiteTensorsToDetectionsCalculator" calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors" input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections" output_stream: "DETECTIONS:detections"
options: { options: {
[mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] { [mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] {

View File

@ -1,34 +0,0 @@
#!/bin/sh
set -e
set -v
echo 'Please run this from root level mediapipe directory! \n Ex:'
echo ' sh mediapipe/examples/coral/setup.sh '
sleep 3
mkdir -p opencv32_arm64_libs
# prepare docker aux script
cp mediapipe/examples/coral/update_sources.sh update_sources.sh
chmod +x update_sources.sh
# backup non-coral Dockerfile
mv Dockerfile Dockerfile.orig
cp mediapipe/examples/coral/Dockerfile Dockerfile
# backup non-coral workspace
cp WORKSPACE WORKSPACE.orig
# create temps
cp WORKSPACE WORKSPACE.1
cp mediapipe/examples/coral/WORKSPACE.coral WORKSPACE.2
# merge (shell decides concat order, unless numbered appropriately)
cat WORKSPACE.1 WORKSPACE.2 > WORKSPACE
# cleanup
rm WORKSPACE.1 WORKSPACE.2
echo 'done'

View File

@ -1,11 +0,0 @@
#!/bin/bash
# To run in the Coral Docker environment.
. /etc/os-release
sed -i "s/deb\ /deb \[arch=amd64\]\ /g" /etc/apt/sources.list
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME} main universe" >> /etc/apt/sources.list
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-updates main universe" >> /etc/apt/sources.list
echo "deb [arch=arm64,armhf] http://ports.ubuntu.com/ubuntu-ports ${UBUNTU_CODENAME}-security main universe" >> /etc/apt/sources.list

View File

@ -17,8 +17,10 @@ load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library"
licenses(["notice"]) licenses(["notice"])
package(default_visibility = [ package(default_visibility = [
"//buzz/diffractor/mediapipe:__subpackages__",
"//mediapipe/examples:__subpackages__", "//mediapipe/examples:__subpackages__",
"//mediapipe/viz:__subpackages__", "//mediapipe/viz:__subpackages__",
"//mediapipe/web/solutions:__subpackages__",
]) ])
cc_library( cc_library(

View File

@ -43,6 +43,9 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kDetectedBordersTag[] = "DETECTED_BORDERS";
constexpr char kVideoTag[] = "VIDEO";
const char kConfig[] = R"( const char kConfig[] = R"(
calculator: "BorderDetectionCalculator" calculator: "BorderDetectionCalculator"
input_stream: "VIDEO:camera_frames" input_stream: "VIDEO:camera_frames"
@ -81,14 +84,14 @@ TEST(BorderDetectionCalculatorTest, NoBorderTest) {
ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight); ImageFormat::SRGB, kTestFrameWidth, kTestFrameHeight);
cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get()); cv::Mat input_mat = mediapipe::formats::MatView(input_frame.get());
input_mat.setTo(cv::Scalar(0, 0, 0)); input_mat.setTo(cv::Scalar(0, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(0, static_features.border().size()); ASSERT_EQ(0, static_features.border().size());
@ -115,14 +118,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderTest) {
cv::Mat sub_image = cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight)); input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0)); sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size()); ASSERT_EQ(1, static_features.border().size());
@ -155,14 +158,14 @@ TEST(BorderDetectionCalculatorTest, TopBorderPadTest) {
cv::Mat sub_image = cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight)); input_mat(cv::Rect(0, 0, kTestFrameWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0)); sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size()); ASSERT_EQ(1, static_features.border().size());
@ -197,14 +200,14 @@ TEST(BorderDetectionCalculatorTest, BottomBorderTest) {
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight, input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
kTestFrameWidth, kBottomBorderHeight)); kTestFrameWidth, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0)); bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(1, static_features.border().size()); ASSERT_EQ(1, static_features.border().size());
@ -238,14 +241,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTest) {
input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight, input_mat(cv::Rect(0, kTestFrameHeight - kBottomBorderHeight,
kTestFrameWidth, kBottomBorderHeight)); kTestFrameWidth, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0)); bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(2, static_features.border().size()); ASSERT_EQ(2, static_features.border().size());
@ -291,14 +294,14 @@ TEST(BorderDetectionCalculatorTest, TopBottomBorderTestAspect2) {
input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight, input_mat(cv::Rect(0, kTestFrameHeightTall - kBottomBorderHeight,
kTestFrameWidthTall, kBottomBorderHeight)); kTestFrameWidthTall, kBottomBorderHeight));
bottom_image.setTo(cv::Scalar(255, 0, 0)); bottom_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(2, static_features.border().size()); ASSERT_EQ(2, static_features.border().size());
@ -352,14 +355,14 @@ TEST(BorderDetectionCalculatorTest, DominantColor) {
input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2)); input_mat(cv::Rect(0, 0, kTestFrameWidth / 2 + 50, kTestFrameHeight / 2));
sub_image.setTo(cv::Scalar(255, 0, 0)); sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("DETECTED_BORDERS").packets; runner->Outputs().Tag(kDetectedBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
ASSERT_EQ(0, static_features.border().size()); ASSERT_EQ(0, static_features.border().size());
@ -383,7 +386,7 @@ void BM_Large(benchmark::State& state) {
cv::Mat sub_image = cv::Mat sub_image =
input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight)); input_mat(cv::Rect(0, 0, kTestFrameLargeWidth, kTopBorderHeight));
sub_image.setTo(cv::Scalar(255, 0, 0)); sub_image.setTo(cv::Scalar(255, 0, 0));
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
// Run the calculator. // Run the calculator.

View File

@ -31,7 +31,11 @@ constexpr char kVideoSize[] = "VIDEO_SIZE";
constexpr char kSalientRegions[] = "SALIENT_REGIONS"; constexpr char kSalientRegions[] = "SALIENT_REGIONS";
constexpr char kDetections[] = "DETECTIONS"; constexpr char kDetections[] = "DETECTIONS";
constexpr char kDetectedBorders[] = "BORDERS"; constexpr char kDetectedBorders[] = "BORDERS";
// Crop location as abs rect discretized.
constexpr char kCropRect[] = "CROP_RECT"; constexpr char kCropRect[] = "CROP_RECT";
// Crop location as normalized rect.
constexpr char kNormalizedCropRect[] = "NORMALIZED_CROP_RECT";
// Crop location without position smoothing.
constexpr char kFirstCropRect[] = "FIRST_CROP_RECT"; constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
// Can be used to control whether an animated zoom should actually performed // Can be used to control whether an animated zoom should actually performed
// (configured through option us_to_first_rect). If provided, a non-zero integer // (configured through option us_to_first_rect). If provided, a non-zero integer
@ -51,6 +55,8 @@ constexpr float kFieldOfView = 60;
// Used to save state on Close and load state on Open in a new graph. // Used to save state on Close and load state on Open in a new graph.
// Can be used to preserve state between graphs. // Can be used to preserve state between graphs.
constexpr char kStateCache[] = "STATE_CACHE"; constexpr char kStateCache[] = "STATE_CACHE";
// Tolerance for zooming out recentering.
constexpr float kPixelTolerance = 3;
namespace mediapipe { namespace mediapipe {
namespace autoflip { namespace autoflip {
@ -166,6 +172,9 @@ absl::Status ContentZoomingCalculator::GetContract(
if (cc->Outputs().HasTag(kCropRect)) { if (cc->Outputs().HasTag(kCropRect)) {
cc->Outputs().Tag(kCropRect).Set<mediapipe::Rect>(); cc->Outputs().Tag(kCropRect).Set<mediapipe::Rect>();
} }
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
cc->Outputs().Tag(kNormalizedCropRect).Set<mediapipe::NormalizedRect>();
}
if (cc->Outputs().HasTag(kFirstCropRect)) { if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs().Tag(kFirstCropRect).Set<mediapipe::NormalizedRect>(); cc->Outputs().Tag(kFirstCropRect).Set<mediapipe::NormalizedRect>();
} }
@ -553,6 +562,16 @@ absl::Status ContentZoomingCalculator::Process(
cc->Outputs().Tag(kCropRect).Add(default_rect.release(), cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
Timestamp(cc->InputTimestamp())); Timestamp(cc->InputTimestamp()));
} }
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
default_rect->set_x_center(0.5);
default_rect->set_y_center(0.5);
default_rect->set_width(1.0);
default_rect->set_height(1.0);
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
}
// Also provide a first crop rect: in this case a zero-sized one. // Also provide a first crop rect: in this case a zero-sized one.
if (cc->Outputs().HasTag(kFirstCropRect)) { if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs() cc->Outputs()
@ -634,9 +653,9 @@ absl::Status ContentZoomingCalculator::Process(
// Compute smoothed zoom camera path. // Compute smoothed zoom camera path.
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation( MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
height, cc->InputTimestamp().Microseconds())); height, cc->InputTimestamp().Microseconds()));
int path_height; float path_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height)); MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
int path_width = path_height * target_aspect_; float path_width = path_height * target_aspect_;
// Update pixel-per-degree value for pan/tilt. // Update pixel-per-degree value for pan/tilt.
int target_height; int target_height;
@ -652,11 +671,48 @@ absl::Status ContentZoomingCalculator::Process(
offset_x, cc->InputTimestamp().Microseconds())); offset_x, cc->InputTimestamp().Microseconds()));
MP_RETURN_IF_ERROR(path_solver_tilt_->AddObservation( MP_RETURN_IF_ERROR(path_solver_tilt_->AddObservation(
offset_y, cc->InputTimestamp().Microseconds())); offset_y, cc->InputTimestamp().Microseconds()));
int path_offset_x; float path_offset_x;
MP_RETURN_IF_ERROR(path_solver_pan_->GetState(&path_offset_x)); MP_RETURN_IF_ERROR(path_solver_pan_->GetState(&path_offset_x));
int path_offset_y; float path_offset_y;
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y)); MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
float delta_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
int delta_width = delta_height * target_aspect_;
// Smooth centering when zooming out.
float remaining_width = target_width - path_width;
int width_space = frame_width_ - target_width;
if (abs(path_offset_x - frame_width_ / 2) >
width_space / 2 + kPixelTolerance &&
remaining_width > kPixelTolerance) {
float required_width =
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
if (path_offset_x < frame_width_ / 2) {
path_offset_x += delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
} else {
path_offset_x -= delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
}
}
float remaining_height = target_height - path_height;
int height_space = frame_height_ - target_height;
if (abs(path_offset_y - frame_height_ / 2) >
height_space / 2 + kPixelTolerance &&
remaining_height > kPixelTolerance) {
float required_height =
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
if (path_offset_y < frame_height_ / 2) {
path_offset_y += delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
} else {
path_offset_y -= delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
}
}
// Prevent box from extending beyond the image after camera smoothing. // Prevent box from extending beyond the image after camera smoothing.
if (path_offset_y - ceil(path_height / 2.0) < 0) { if (path_offset_y - ceil(path_height / 2.0) < 0) {
path_offset_y = ceil(path_height / 2.0); path_offset_y = ceil(path_height / 2.0);
@ -705,7 +761,7 @@ absl::Status ContentZoomingCalculator::Process(
is_animating = IsAnimatingToFirstRect(cc->InputTimestamp()); is_animating = IsAnimatingToFirstRect(cc->InputTimestamp());
} }
// Transmit downstream to glcroppingcalculator. // Transmit downstream to glcroppingcalculator in discrete int values.
if (cc->Outputs().HasTag(kCropRect)) { if (cc->Outputs().HasTag(kCropRect)) {
std::unique_ptr<mediapipe::Rect> gpu_rect; std::unique_ptr<mediapipe::Rect> gpu_rect;
if (is_animating) { if (is_animating) {
@ -716,13 +772,36 @@ absl::Status ContentZoomingCalculator::Process(
} else { } else {
gpu_rect = absl::make_unique<mediapipe::Rect>(); gpu_rect = absl::make_unique<mediapipe::Rect>();
gpu_rect->set_x_center(path_offset_x); gpu_rect->set_x_center(path_offset_x);
gpu_rect->set_width(path_height * target_aspect_); gpu_rect->set_width(path_width);
gpu_rect->set_y_center(path_offset_y); gpu_rect->set_y_center(path_offset_y);
gpu_rect->set_height(path_height); gpu_rect->set_height(path_height);
} }
cc->Outputs().Tag(kCropRect).Add(gpu_rect.release(), cc->Outputs().Tag(kCropRect).Add(gpu_rect.release(),
Timestamp(cc->InputTimestamp())); Timestamp(cc->InputTimestamp()));
} }
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
absl::make_unique<mediapipe::NormalizedRect>();
float float_frame_width = static_cast<float>(frame_width_);
float float_frame_height = static_cast<float>(frame_height_);
if (is_animating) {
auto rect =
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
MP_RETURN_IF_ERROR(rect.status());
gpu_rect->set_x_center(rect->x_center() / float_frame_width);
gpu_rect->set_width(rect->width() / float_frame_width);
gpu_rect->set_y_center(rect->y_center() / float_frame_height);
gpu_rect->set_height(rect->height() / float_frame_height);
} else {
gpu_rect->set_x_center(path_offset_x / float_frame_width);
gpu_rect->set_width(path_width / float_frame_width);
gpu_rect->set_y_center(path_offset_y / float_frame_height);
gpu_rect->set_height(path_height / float_frame_height);
}
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(gpu_rect.release(), Timestamp(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kFirstCropRect)) { if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs() cc->Outputs()

View File

@ -38,6 +38,17 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kFirstCropRectTag[] = "FIRST_CROP_RECT";
constexpr char kStateCacheTag[] = "STATE_CACHE";
constexpr char kCropRectTag[] = "CROP_RECT";
constexpr char kBordersTag[] = "BORDERS";
constexpr char kSalientRegionsTag[] = "SALIENT_REGIONS";
constexpr char kVideoTag[] = "VIDEO";
constexpr char kMaxZoomFactorPctTag[] = "MAX_ZOOM_FACTOR_PCT";
constexpr char kAnimateZoomTag[] = "ANIMATE_ZOOM";
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
constexpr char kDetectionsTag[] = "DETECTIONS";
const char kConfigA[] = R"( const char kConfigA[] = R"(
calculator: "ContentZoomingCalculator" calculator: "ContentZoomingCalculator"
input_stream: "VIDEO:camera_frames" input_stream: "VIDEO:camera_frames"
@ -48,12 +59,15 @@ const char kConfigA[] = R"(
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -73,12 +87,15 @@ const char kConfigB[] = R"(
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -94,12 +111,15 @@ const char kConfigC[] = R"(
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -111,17 +131,21 @@ const char kConfigD[] = R"(
input_stream: "DETECTIONS:detections" input_stream: "DETECTIONS:detections"
output_stream: "CROP_RECT:rect" output_stream: "CROP_RECT:rect"
output_stream: "FIRST_CROP_RECT:first_rect" output_stream: "FIRST_CROP_RECT:first_rect"
output_stream: "NORMALIZED_CROP_RECT:float_rect"
options: { options: {
[mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: { [mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -139,12 +163,15 @@ const char kConfigE[] = R"(
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -162,12 +189,15 @@ const char kConfigF[] = R"(
max_zoom_value_deg: 0 max_zoom_value_deg: 0
kinematic_options_zoom { kinematic_options_zoom {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_tilt { kinematic_options_tilt {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
kinematic_options_pan { kinematic_options_pan {
min_motion_to_reframe: 1.2 min_motion_to_reframe: 1.2
max_velocity: 18
} }
} }
} }
@ -220,17 +250,17 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
detections->push_back(detection); detections->push_back(detection);
} }
runner->MutableInputs() runner->MutableInputs()
->Tag("DETECTIONS") ->Tag(kDetectionsTag)
.packets.push_back(Adopt(detections.release()).At(Timestamp(time))); .packets.push_back(Adopt(detections.release()).At(Timestamp(time)));
auto input_size = ::absl::make_unique<std::pair<int, int>>(width, height); auto input_size = ::absl::make_unique<std::pair<int, int>>(width, height);
runner->MutableInputs() runner->MutableInputs()
->Tag("VIDEO_SIZE") ->Tag(kVideoSizeTag)
.packets.push_back(Adopt(input_size.release()).At(Timestamp(time))); .packets.push_back(Adopt(input_size.release()).At(Timestamp(time)));
if (flags.animated_zoom.has_value()) { if (flags.animated_zoom.has_value()) {
runner->MutableInputs() runner->MutableInputs()
->Tag("ANIMATE_ZOOM") ->Tag(kAnimateZoomTag)
.packets.push_back( .packets.push_back(
mediapipe::MakePacket<bool>(flags.animated_zoom.value()) mediapipe::MakePacket<bool>(flags.animated_zoom.value())
.At(Timestamp(time))); .At(Timestamp(time)));
@ -238,7 +268,7 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
if (flags.max_zoom_factor_percent.has_value()) { if (flags.max_zoom_factor_percent.has_value()) {
runner->MutableInputs() runner->MutableInputs()
->Tag("MAX_ZOOM_FACTOR_PCT") ->Tag(kMaxZoomFactorPctTag)
.packets.push_back( .packets.push_back(
mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value()) mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
.At(Timestamp(time))); .At(Timestamp(time)));
@ -250,6 +280,21 @@ void AddDetection(const cv::Rect_<float>& position, const int64 time,
AddDetectionFrameSize(position, time, 1000, 1000, runner); AddDetectionFrameSize(position, time, 1000, 1000, runner);
} }
void CheckCropRectFloats(const float x_center, const float y_center,
const float width, const float height,
const int frame_number,
const CalculatorRunner::StreamContentsSet& output) {
ASSERT_GT(output.Tag("NORMALIZED_CROP_RECT").packets.size(), frame_number);
auto float_rect = output.Tag("NORMALIZED_CROP_RECT")
.packets[frame_number]
.Get<mediapipe::NormalizedRect>();
EXPECT_FLOAT_EQ(float_rect.x_center(), x_center);
EXPECT_FLOAT_EQ(float_rect.y_center(), y_center);
EXPECT_FLOAT_EQ(float_rect.width(), width);
EXPECT_FLOAT_EQ(float_rect.height(), height);
}
void CheckCropRect(const int x_center, const int y_center, const int width, void CheckCropRect(const int x_center, const int y_center, const int width,
const int height, const int frame_number, const int height, const int frame_number,
const std::vector<Packet>& output_packets) { const std::vector<Packet>& output_packets) {
@ -274,21 +319,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp(0))); Adopt(input_frame.release()).At(Timestamp(0)));
runner->MutableInputs() runner->MutableInputs()
->Tag("SALIENT_REGIONS") ->Tag(kSalientRegionsTag)
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0))); .packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("BORDERS").packets; runner->Outputs().Tag(kBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
CheckBorder(static_features, 1000, 1000, 495, 395); CheckBorder(static_features, 1000, 1000, 494, 394);
} }
TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) { TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
@ -297,7 +342,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get()); AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, PanConfig) { TEST(ContentZoomingCalculatorTest, PanConfig) {
@ -313,9 +358,9 @@ TEST(ContentZoomingCalculatorTest, PanConfig) {
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(483, 550, 111, 111, 1, CheckCropRect(483, 550, 111, 111, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, PanConfigWithCache) { TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
@ -330,31 +375,31 @@ TEST(ContentZoomingCalculatorTest, PanConfigWithCache) {
options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(50.0); options->mutable_kinematic_options_zoom()->set_min_motion_to_reframe(50.0);
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get()); AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(483, 550, 111, 111, 0, CheckCropRect(483, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
// Now repeat the last frame for a new runner without the cache to see a reset // Now repeat the last frame for a new runner without the cache to see a reset
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost. CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
} }
@ -371,9 +416,9 @@ TEST(ContentZoomingCalculatorTest, TiltConfig) {
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(450, 583, 111, 111, 1, CheckCropRect(450, 583, 111, 111, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ZoomConfig) { TEST(ContentZoomingCalculatorTest, ZoomConfig) {
@ -389,9 +434,9 @@ TEST(ContentZoomingCalculatorTest, ZoomConfig) {
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(450, 550, 139, 139, 1, CheckCropRect(450, 550, 138, 138, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) { TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
@ -406,31 +451,31 @@ TEST(ContentZoomingCalculatorTest, ZoomConfigWithCache) {
options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2); options->mutable_kinematic_options_zoom()->set_update_rate_seconds(2);
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get()); AddDetection(cv::Rect_<float>(.4, .5, .1, .1), 0, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 111, 111, 0, CheckCropRect(450, 550, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(450, 550, 139, 139, 0, CheckCropRect(450, 550, 138, 138, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
// Now repeat the last frame for a new runner without the cache to see a reset // Now repeat the last frame for a new runner without the cache to see a reset
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(nullptr);
AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get()); AddDetection(cv::Rect_<float>(.45, .55, .15, .15), 2000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost. CheckCropRect(525, 625, 166, 166, 0, // Without a cache, state was lost.
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
} }
@ -448,18 +493,18 @@ TEST(ContentZoomingCalculatorTest, MinAspectBorderValues) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp(0))); Adopt(input_frame.release()).At(Timestamp(0)));
runner->MutableInputs() runner->MutableInputs()
->Tag("SALIENT_REGIONS") ->Tag(kSalientRegionsTag)
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0))); .packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("BORDERS").packets; runner->Outputs().Tag(kBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
CheckBorder(static_features, 1000, 1000, 250, 250); CheckBorder(static_features, 1000, 1000, 250, 250);
@ -485,18 +530,18 @@ TEST(ContentZoomingCalculatorTest, TwoFacesWide) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp(0))); Adopt(input_frame.release()).At(Timestamp(0)));
runner->MutableInputs() runner->MutableInputs()
->Tag("SALIENT_REGIONS") ->Tag(kSalientRegionsTag)
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0))); .packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("BORDERS").packets; runner->Outputs().Tag(kBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
@ -510,18 +555,18 @@ TEST(ContentZoomingCalculatorTest, NoDetectionOnInit) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1000, 1000);
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp(0))); Adopt(input_frame.release()).At(Timestamp(0)));
runner->MutableInputs() runner->MutableInputs()
->Tag("SALIENT_REGIONS") ->Tag(kSalientRegionsTag)
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0))); .packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("BORDERS").packets; runner->Outputs().Tag(kBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
@ -542,21 +587,21 @@ TEST(ContentZoomingCalculatorTest, ZoomTestPairSize) {
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000); auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
runner->MutableInputs() runner->MutableInputs()
->Tag("VIDEO_SIZE") ->Tag(kVideoSizeTag)
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0))); .packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
runner->MutableInputs() runner->MutableInputs()
->Tag("SALIENT_REGIONS") ->Tag(kSalientRegionsTag)
.packets.push_back(Adopt(detection_set.release()).At(Timestamp(0))); .packets.push_back(Adopt(detection_set.release()).At(Timestamp(0)));
// Run the calculator. // Run the calculator.
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("BORDERS").packets; runner->Outputs().Tag(kBordersTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& static_features = output_packets[0].Get<StaticFeatures>(); const auto& static_features = output_packets[0].Get<StaticFeatures>();
CheckBorder(static_features, 1000, 1000, 495, 395); CheckBorder(static_features, 1000, 1000, 494, 394);
} }
TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) { TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
@ -571,9 +616,9 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) {
AddDetection(cv::Rect_<float>(.9, .9, .1, .1), 1000000, runner.get()); AddDetection(cv::Rect_<float>(.9, .9, .1, .1), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(972, 972, 55, 55, 0, CheckCropRect(972, 972, 55, 55, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(958, 958, 83, 83, 1, CheckCropRect(944, 944, 83, 83, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) { TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
@ -587,8 +632,8 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) {
AddDetection(cv::Rect_<float>(0, 0, .05, .05), 0, runner.get()); AddDetection(cv::Rect_<float>(0, 0, .05, .05), 0, runner.get());
AddDetection(cv::Rect_<float>(0, 0, .1, .1), 1000000, runner.get()); AddDetection(cv::Rect_<float>(0, 0, .1, .1), 1000000, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag("CROP_RECT").packets); CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(42, 42, 83, 83, 1, runner->Outputs().Tag("CROP_RECT").packets); CheckCropRect(56, 56, 83, 83, 1, runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, VerticalShift) { TEST(ContentZoomingCalculatorTest, VerticalShift) {
@ -601,7 +646,9 @@ TEST(ContentZoomingCalculatorTest, VerticalShift) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
// 1000px * .1 offset + 1000*.1*.1 shift = 170 // 1000px * .1 offset + 1000*.1*.1 shift = 170
CheckCropRect(150, 170, 111, 111, 0, CheckCropRect(150, 170, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRectFloats(150 / 1000.0, 170 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
runner->Outputs());
} }
TEST(ContentZoomingCalculatorTest, HorizontalShift) { TEST(ContentZoomingCalculatorTest, HorizontalShift) {
@ -614,7 +661,9 @@ TEST(ContentZoomingCalculatorTest, HorizontalShift) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
// 1000px * .1 offset + 1000*.1*.1 shift = 170 // 1000px * .1 offset + 1000*.1*.1 shift = 170
CheckCropRect(170, 150, 111, 111, 0, CheckCropRect(170, 150, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRectFloats(170 / 1000.0, 150 / 1000.0, 111 / 1000.0, 111 / 1000.0, 0,
runner->Outputs());
} }
TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) { TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
@ -627,14 +676,14 @@ TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) {
AddDetection(cv::Rect_<float>(.9, 0, .1, .1), 0, runner.get()); AddDetection(cv::Rect_<float>(.9, 0, .1, .1), 0, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(944, 56, 111, 111, 0, CheckCropRect(944, 56, 111, 111, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, EmptySize) { TEST(ContentZoomingCalculatorTest, EmptySize) {
auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD); auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
ASSERT_EQ(runner->Outputs().Tag("CROP_RECT").packets.size(), 0); ASSERT_EQ(runner->Outputs().Tag(kCropRectTag).packets.size(), 0);
} }
TEST(ContentZoomingCalculatorTest, EmptyDetections) { TEST(ContentZoomingCalculatorTest, EmptyDetections) {
@ -642,11 +691,11 @@ TEST(ContentZoomingCalculatorTest, EmptyDetections) {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000); auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
runner->MutableInputs() runner->MutableInputs()
->Tag("VIDEO_SIZE") ->Tag(kVideoSizeTag)
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0))); .packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 1000, 1000, 0, CheckCropRect(500, 500, 1000, 1000, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) { TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
@ -658,9 +707,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationary) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 222, 222, 0, CheckCropRect(500, 500, 222, 222, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 1, CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) { TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
@ -669,23 +718,23 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeStationaryWithCache) {
config.add_input_side_packet("STATE_CACHE:state_cache"); config.add_input_side_packet("STATE_CACHE:state_cache");
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 1000, 1000, AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 1000, 1000,
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 222, 222, 0, CheckCropRect(500, 500, 222, 222, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1, 500, 500, AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1, 500, 500,
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 0, CheckCropRect(500 * 0.5, 500 * 0.5, 222 * 0.5, 222 * 0.5, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
} }
@ -700,11 +749,11 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZooming) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 888, 888, 0, CheckCropRect(500, 500, 888, 888, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 588, 588, 1, CheckCropRect(500, 500, 588, 588, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 2, CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) { TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
@ -713,18 +762,18 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
config.add_input_side_packet("STATE_CACHE:state_cache"); config.add_input_side_packet("STATE_CACHE:state_cache");
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetectionFrameSize(cv::Rect_<float>(.1, .1, .8, .8), 0, 1000, 1000, AddDetectionFrameSize(cv::Rect_<float>(.1, .1, .8, .8), 0, 1000, 1000,
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 888, 888, 0, CheckCropRect(500, 500, 888, 888, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
// The second runner should just resume based on state from the first runner. // The second runner should just resume based on state from the first runner.
{ {
auto runner = ::absl::make_unique<CalculatorRunner>(config); auto runner = ::absl::make_unique<CalculatorRunner>(config);
runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket< runner->MutableSidePackets()->Tag(kStateCacheTag) = MakePacket<
mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache); mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000, AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
runner.get()); runner.get());
@ -732,9 +781,9 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 588, 588, 0, CheckCropRect(500, 500, 588, 588, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 1, CheckCropRect(500 * 0.5, 500 * 0.5, 288 * 0.5, 288 * 0.5, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
} }
@ -749,7 +798,7 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
// 55/60 * 1000 = 916 // 55/60 * 1000 = 916
CheckCropRect(500, 500, 916, 916, 0, CheckCropRect(500, 500, 916, 916, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) { TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
@ -772,11 +821,11 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
// Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360 // Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
// Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430 // Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
CheckCropRect(320, 240, 480, 360, 0, CheckCropRect(320, 240, 480, 360, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(640, 360, 769, 433, 2, CheckCropRect(640, 360, 769, 433, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(320, 240, 480, 360, 3, CheckCropRect(320, 240, 480, 360, 3,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) { TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
@ -795,9 +844,9 @@ TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
// 55/60 * 1000 = 916 // 55/60 * 1000 = 916
CheckCropRect(500, 500, 950, 950, 0, CheckCropRect(500, 500, 950, 950, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 1000, 1000, 2, CheckCropRect(500, 500, 1000, 1000, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, StartZoomedOut) { TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
@ -816,13 +865,13 @@ TEST(ContentZoomingCalculatorTest, StartZoomedOut) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 1000, 1000, 0, CheckCropRect(500, 500, 1000, 1000, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 880, 880, 1, CheckCropRect(500, 500, 880, 880, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 760, 760, 2, CheckCropRect(500, 500, 760, 760, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 655, 655, 3, CheckCropRect(500, 500, 655, 655, 3,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) { TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
@ -844,15 +893,15 @@ TEST(ContentZoomingCalculatorTest, AnimateToFirstRect) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 1000, 1000, 0, CheckCropRect(500, 500, 1000, 1000, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 1000, 1000, 1, CheckCropRect(500, 500, 1000, 1000, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 470, 470, 2, CheckCropRect(500, 500, 470, 470, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 222, 222, 3, CheckCropRect(500, 500, 222, 222, 3,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 222, 222, 4, CheckCropRect(500, 500, 222, 222, 4,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, CanControlAnimation) { TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
@ -879,15 +928,15 @@ TEST(ContentZoomingCalculatorTest, CanControlAnimation) {
runner.get(), {.animated_zoom = false}); runner.get(), {.animated_zoom = false});
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 1000, 1000, 0, CheckCropRect(500, 500, 1000, 1000, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 1000, 1000, 1, CheckCropRect(500, 500, 1000, 1000, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 470, 470, 2, CheckCropRect(500, 500, 470, 470, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 222, 222, 3, CheckCropRect(500, 500, 222, 222, 3,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 222, 222, 4, CheckCropRect(500, 500, 222, 222, 4,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) { TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
@ -907,11 +956,11 @@ TEST(ContentZoomingCalculatorTest, DoesNotAnimateIfDisabledViaInput) {
runner.get(), {.animated_zoom = false}); runner.get(), {.animated_zoom = false});
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckCropRect(500, 500, 1000, 1000, 0, CheckCropRect(500, 500, 1000, 1000, 0,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 880, 880, 1, CheckCropRect(500, 500, 880, 880, 1,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
CheckCropRect(500, 500, 760, 760, 2, CheckCropRect(500, 500, 760, 760, 2,
runner->Outputs().Tag("CROP_RECT").packets); runner->Outputs().Tag(kCropRectTag).packets);
} }
TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) { TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
@ -920,13 +969,13 @@ TEST(ContentZoomingCalculatorTest, ProvidesZeroSizeFirstRectWithoutDetections) {
auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000); auto input_size = ::absl::make_unique<std::pair<int, int>>(1000, 1000);
runner->MutableInputs() runner->MutableInputs()
->Tag("VIDEO_SIZE") ->Tag(kVideoSizeTag)
.packets.push_back(Adopt(input_size.release()).At(Timestamp(0))); .packets.push_back(Adopt(input_size.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("FIRST_CROP_RECT").packets; runner->Outputs().Tag(kFirstCropRectTag).packets;
ASSERT_EQ(output_packets.size(), 1); ASSERT_EQ(output_packets.size(), 1);
const auto& rect = output_packets[0].Get<mediapipe::NormalizedRect>(); const auto& rect = output_packets[0].Get<mediapipe::NormalizedRect>();
EXPECT_EQ(rect.x_center(), 0); EXPECT_EQ(rect.x_center(), 0);
@ -951,7 +1000,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
runner.get()); runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("FIRST_CROP_RECT").packets; runner->Outputs().Tag(kFirstCropRectTag).packets;
ASSERT_EQ(output_packets.size(), 4); ASSERT_EQ(output_packets.size(), 4);
const auto& first_rect = output_packets[0].Get<mediapipe::NormalizedRect>(); const auto& first_rect = output_packets[0].Get<mediapipe::NormalizedRect>();
EXPECT_NEAR(first_rect.x_center(), 0.5, 0.05); EXPECT_NEAR(first_rect.x_center(), 0.5, 0.05);

View File

@ -64,7 +64,7 @@ message FaceBoxAdjusterCalculatorOptions {
// Max value of head motion (max of current or history) to be considered still // Max value of head motion (max of current or history) to be considered still
// stable. // stable.
optional float head_motion_threshold = 14 [default = 10.0]; optional float head_motion_threshold = 14 [default = 360.0];
// The max amount of time to use an old eye distance when the face look angle // The max amount of time to use an old eye distance when the face look angle
// is unstable. // is unstable.

View File

@ -32,6 +32,10 @@
namespace mediapipe { namespace mediapipe {
namespace autoflip { namespace autoflip {
constexpr char kRegionsTag[] = "REGIONS";
constexpr char kFacesTag[] = "FACES";
constexpr char kVideoTag[] = "VIDEO";
// This calculator converts detected faces to SalientRegion protos that can be // This calculator converts detected faces to SalientRegion protos that can be
// used for downstream processing. Each SalientRegion is scored using image // used for downstream processing. Each SalientRegion is scored using image
// cues. Scoring can be controlled through // cues. Scoring can be controlled through
@ -80,17 +84,17 @@ FaceToRegionCalculator::FaceToRegionCalculator() {}
absl::Status FaceToRegionCalculator::GetContract( absl::Status FaceToRegionCalculator::GetContract(
mediapipe::CalculatorContract* cc) { mediapipe::CalculatorContract* cc) {
if (cc->Inputs().HasTag("VIDEO")) { if (cc->Inputs().HasTag(kVideoTag)) {
cc->Inputs().Tag("VIDEO").Set<ImageFrame>(); cc->Inputs().Tag(kVideoTag).Set<ImageFrame>();
} }
cc->Inputs().Tag("FACES").Set<std::vector<mediapipe::Detection>>(); cc->Inputs().Tag(kFacesTag).Set<std::vector<mediapipe::Detection>>();
cc->Outputs().Tag("REGIONS").Set<DetectionSet>(); cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status FaceToRegionCalculator::Open(mediapipe::CalculatorContext* cc) { absl::Status FaceToRegionCalculator::Open(mediapipe::CalculatorContext* cc) {
options_ = cc->Options<FaceToRegionCalculatorOptions>(); options_ = cc->Options<FaceToRegionCalculatorOptions>();
if (!cc->Inputs().HasTag("VIDEO")) { if (!cc->Inputs().HasTag(kVideoTag)) {
RET_CHECK(!options_.use_visual_scorer()) RET_CHECK(!options_.use_visual_scorer())
<< "VIDEO input must be provided when using visual_scorer."; << "VIDEO input must be provided when using visual_scorer.";
RET_CHECK(!options_.export_individual_face_landmarks()) RET_CHECK(!options_.export_individual_face_landmarks())
@ -146,24 +150,24 @@ void FaceToRegionCalculator::ExtendSalientRegionWithPoint(
} }
absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) { absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
if (cc->Inputs().HasTag("VIDEO") && if (cc->Inputs().HasTag(kVideoTag) &&
cc->Inputs().Tag("VIDEO").Value().IsEmpty()) { cc->Inputs().Tag(kVideoTag).Value().IsEmpty()) {
return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
<< "No VIDEO input at time " << cc->InputTimestamp().Seconds(); << "No VIDEO input at time " << cc->InputTimestamp().Seconds();
} }
cv::Mat frame; cv::Mat frame;
if (cc->Inputs().HasTag("VIDEO")) { if (cc->Inputs().HasTag(kVideoTag)) {
frame = mediapipe::formats::MatView( frame = mediapipe::formats::MatView(
&cc->Inputs().Tag("VIDEO").Get<ImageFrame>()); &cc->Inputs().Tag(kVideoTag).Get<ImageFrame>());
frame_width_ = frame.cols; frame_width_ = frame.cols;
frame_height_ = frame.rows; frame_height_ = frame.rows;
} }
auto region_set = ::absl::make_unique<DetectionSet>(); auto region_set = ::absl::make_unique<DetectionSet>();
if (!cc->Inputs().Tag("FACES").Value().IsEmpty()) { if (!cc->Inputs().Tag(kFacesTag).Value().IsEmpty()) {
const auto& input_faces = const auto& input_faces =
cc->Inputs().Tag("FACES").Get<std::vector<mediapipe::Detection>>(); cc->Inputs().Tag(kFacesTag).Get<std::vector<mediapipe::Detection>>();
for (const auto& input_face : input_faces) { for (const auto& input_face : input_faces) {
RET_CHECK(input_face.location_data().format() == RET_CHECK(input_face.location_data().format() ==
@ -276,7 +280,9 @@ absl::Status FaceToRegionCalculator::Process(mediapipe::CalculatorContext* cc) {
} }
} }
} }
cc->Outputs().Tag("REGIONS").Add(region_set.release(), cc->InputTimestamp()); cc->Outputs()
.Tag(kRegionsTag)
.Add(region_set.release(), cc->InputTimestamp());
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -33,6 +33,10 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kRegionsTag[] = "REGIONS";
constexpr char kFacesTag[] = "FACES";
constexpr char kVideoTag[] = "VIDEO";
const char kConfig[] = R"( const char kConfig[] = R"(
calculator: "FaceToRegionCalculator" calculator: "FaceToRegionCalculator"
input_stream: "VIDEO:frames" input_stream: "VIDEO:frames"
@ -100,7 +104,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
if (include_video) { if (include_video) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 800, 600);
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp::PostStream())); Adopt(input_frame.release()).At(Timestamp::PostStream()));
} }
// Setup two faces as input. // Setup two faces as input.
@ -109,7 +113,7 @@ void SetInputs(const std::vector<std::string>& faces, const bool include_video,
for (const auto& face : faces) { for (const auto& face : faces) {
input_faces->push_back(ParseTextProtoOrDie<Detection>(face)); input_faces->push_back(ParseTextProtoOrDie<Detection>(face));
} }
runner->MutableInputs()->Tag("FACES").packets.push_back( runner->MutableInputs()->Tag(kFacesTag).packets.push_back(
Adopt(input_faces.release()).At(Timestamp::PostStream())); Adopt(input_faces.release()).At(Timestamp::PostStream()));
} }
@ -144,7 +148,7 @@ TEST(FaceToRegionCalculatorTest, FaceFullTypeSize) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
@ -177,7 +181,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksTypeSize) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
@ -208,7 +212,7 @@ TEST(FaceToRegionCalculatorTest, FaceLandmarksBox) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
@ -243,7 +247,7 @@ TEST(FaceToRegionCalculatorTest, FaceScore) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(1, regions.detections().size()); ASSERT_EQ(1, regions.detections().size());
@ -292,7 +296,7 @@ TEST(FaceToRegionCalculatorTest, FaceNoVideoPass) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();

View File

@ -52,6 +52,9 @@ LocalizationToRegionCalculator::LocalizationToRegionCalculator() {}
namespace { namespace {
constexpr char kRegionsTag[] = "REGIONS";
constexpr char kDetectionsTag[] = "DETECTIONS";
// Converts an object detection to a autoflip SignalType. Returns true if the // Converts an object detection to a autoflip SignalType. Returns true if the
// std::string label has a autoflip label. // std::string label has a autoflip label.
bool MatchType(const std::string& label, SignalType* type) { bool MatchType(const std::string& label, SignalType* type) {
@ -86,8 +89,8 @@ void FillSalientRegion(const mediapipe::Detection& detection,
absl::Status LocalizationToRegionCalculator::GetContract( absl::Status LocalizationToRegionCalculator::GetContract(
mediapipe::CalculatorContract* cc) { mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag("DETECTIONS").Set<std::vector<mediapipe::Detection>>(); cc->Inputs().Tag(kDetectionsTag).Set<std::vector<mediapipe::Detection>>();
cc->Outputs().Tag("REGIONS").Set<DetectionSet>(); cc->Outputs().Tag(kRegionsTag).Set<DetectionSet>();
return absl::OkStatus(); return absl::OkStatus();
} }
@ -101,7 +104,7 @@ absl::Status LocalizationToRegionCalculator::Open(
absl::Status LocalizationToRegionCalculator::Process( absl::Status LocalizationToRegionCalculator::Process(
mediapipe::CalculatorContext* cc) { mediapipe::CalculatorContext* cc) {
const auto& annotations = const auto& annotations =
cc->Inputs().Tag("DETECTIONS").Get<std::vector<mediapipe::Detection>>(); cc->Inputs().Tag(kDetectionsTag).Get<std::vector<mediapipe::Detection>>();
auto regions = ::absl::make_unique<DetectionSet>(); auto regions = ::absl::make_unique<DetectionSet>();
for (const auto& detection : annotations) { for (const auto& detection : annotations) {
RET_CHECK_EQ(detection.label().size(), 1) RET_CHECK_EQ(detection.label().size(), 1)
@ -118,7 +121,7 @@ absl::Status LocalizationToRegionCalculator::Process(
} }
} }
cc->Outputs().Tag("REGIONS").Add(regions.release(), cc->InputTimestamp()); cc->Outputs().Tag(kRegionsTag).Add(regions.release(), cc->InputTimestamp());
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -31,6 +31,9 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kRegionsTag[] = "REGIONS";
constexpr char kDetectionsTag[] = "DETECTIONS";
const char kConfig[] = R"( const char kConfig[] = R"(
calculator: "LocalizationToRegionCalculator" calculator: "LocalizationToRegionCalculator"
input_stream: "DETECTIONS:detections" input_stream: "DETECTIONS:detections"
@ -81,7 +84,7 @@ void SetInputs(CalculatorRunner* runner,
inputs->push_back(ParseTextProtoOrDie<Detection>(detection)); inputs->push_back(ParseTextProtoOrDie<Detection>(detection));
} }
runner->MutableInputs() runner->MutableInputs()
->Tag("DETECTIONS") ->Tag(kDetectionsTag)
.packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream())); .packets.push_back(Adopt(inputs.release()).At(Timestamp::PostStream()));
} }
@ -109,7 +112,7 @@ TEST(LocalizationToRegionCalculatorTest, StandardTypes) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(2, regions.detections().size()); ASSERT_EQ(2, regions.detections().size());
@ -137,7 +140,7 @@ TEST(LocalizationToRegionCalculatorTest, AllTypes) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(3, regions.detections().size()); ASSERT_EQ(3, regions.detections().size());
@ -153,7 +156,7 @@ TEST(LocalizationToRegionCalculatorTest, BothTypes) {
// Check the output regions. // Check the output regions.
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("REGIONS").packets; runner->Outputs().Tag(kRegionsTag).packets;
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
const auto& regions = output_packets[0].Get<DetectionSet>(); const auto& regions = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(5, regions.detections().size()); ASSERT_EQ(5, regions.detections().size());

View File

@ -34,6 +34,23 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kFramingDetectionsVizFramesTag[] =
"FRAMING_DETECTIONS_VIZ_FRAMES";
constexpr char kExternalRenderingFullVidTag[] = "EXTERNAL_RENDERING_FULL_VID";
constexpr char kExternalRenderingPerFrameTag[] = "EXTERNAL_RENDERING_PER_FRAME";
constexpr char kCroppingSummaryTag[] = "CROPPING_SUMMARY";
constexpr char kSalientPointFrameVizFramesTag[] =
"SALIENT_POINT_FRAME_VIZ_FRAMES";
constexpr char kKeyFrameCropRegionVizFramesTag[] =
"KEY_FRAME_CROP_REGION_VIZ_FRAMES";
constexpr char kCroppedFramesTag[] = "CROPPED_FRAMES";
constexpr char kShotBoundariesTag[] = "SHOT_BOUNDARIES";
constexpr char kStaticFeaturesTag[] = "STATIC_FEATURES";
constexpr char kVideoSizeTag[] = "VIDEO_SIZE";
constexpr char kVideoFramesTag[] = "VIDEO_FRAMES";
constexpr char kDetectionFeaturesTag[] = "DETECTION_FEATURES";
constexpr char kKeyFramesTag[] = "KEY_FRAMES";
using ::testing::HasSubstr; using ::testing::HasSubstr;
constexpr char kConfig[] = R"( constexpr char kConfig[] = R"(
@ -241,10 +258,10 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
const int key_frame_height, bool randomize, const int key_frame_height, bool randomize,
CalculatorRunner::StreamContentsSet* inputs) { CalculatorRunner::StreamContentsSet* inputs) {
Timestamp timestamp(time_ms); Timestamp timestamp(time_ms);
if (inputs->HasTag("KEY_FRAMES")) { if (inputs->HasTag(kKeyFramesTag)) {
auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width, auto key_frame = MakeImageFrameFromColor(GetRandomColor(), key_frame_width,
key_frame_height); key_frame_height);
inputs->Tag("KEY_FRAMES") inputs->Tag(kKeyFramesTag)
.packets.push_back(Adopt(key_frame.release()).At(timestamp)); .packets.push_back(Adopt(key_frame.release()).At(timestamp));
} }
if (randomize) { if (randomize) {
@ -252,11 +269,11 @@ void AddKeyFrameFeatures(const int64 time_ms, const int key_frame_width,
kMinNumDetections, kMaxNumDetections)(GetGen()); kMinNumDetections, kMaxNumDetections)(GetGen());
auto detections = auto detections =
MakeDetections(num_detections, key_frame_width, key_frame_height); MakeDetections(num_detections, key_frame_width, key_frame_height);
inputs->Tag("DETECTION_FEATURES") inputs->Tag(kDetectionFeaturesTag)
.packets.push_back(Adopt(detections.release()).At(timestamp)); .packets.push_back(Adopt(detections.release()).At(timestamp));
} else { } else {
auto detections = MakeCenterDetection(key_frame_width, key_frame_height); auto detections = MakeCenterDetection(key_frame_width, key_frame_height);
inputs->Tag("DETECTION_FEATURES") inputs->Tag(kDetectionFeaturesTag)
.packets.push_back(Adopt(detections.release()).At(timestamp)); .packets.push_back(Adopt(detections.release()).At(timestamp));
} }
} }
@ -272,19 +289,19 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
int64 time_ms = start_frame_index * kTimestampDiff; int64 time_ms = start_frame_index * kTimestampDiff;
for (int i = 0; i < num_scene_frames; ++i) { for (int i = 0; i < num_scene_frames; ++i) {
Timestamp timestamp(time_ms); Timestamp timestamp(time_ms);
if (inputs->HasTag("VIDEO_FRAMES")) { if (inputs->HasTag(kVideoFramesTag)) {
auto frame = auto frame =
MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height); MakeImageFrameFromColor(GetRandomColor(), frame_width, frame_height);
inputs->Tag("VIDEO_FRAMES") inputs->Tag(kVideoFramesTag)
.packets.push_back(Adopt(frame.release()).At(timestamp)); .packets.push_back(Adopt(frame.release()).At(timestamp));
} else { } else {
auto input_size = auto input_size =
::absl::make_unique<std::pair<int, int>>(frame_width, frame_height); ::absl::make_unique<std::pair<int, int>>(frame_width, frame_height);
inputs->Tag("VIDEO_SIZE") inputs->Tag(kVideoSizeTag)
.packets.push_back(Adopt(input_size.release()).At(timestamp)); .packets.push_back(Adopt(input_size.release()).At(timestamp));
} }
auto static_features = absl::make_unique<StaticFeatures>(); auto static_features = absl::make_unique<StaticFeatures>();
inputs->Tag("STATIC_FEATURES") inputs->Tag(kStaticFeaturesTag)
.packets.push_back(Adopt(static_features.release()).At(timestamp)); .packets.push_back(Adopt(static_features.release()).At(timestamp));
if (DownSampleRate == 1) { if (DownSampleRate == 1) {
AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, false, AddKeyFrameFeatures(time_ms, key_frame_width, key_frame_height, false,
@ -294,7 +311,7 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
inputs); inputs);
} }
if (i == num_scene_frames - 1) { // adds shot boundary if (i == num_scene_frames - 1) { // adds shot boundary
inputs->Tag("SHOT_BOUNDARIES") inputs->Tag(kShotBoundariesTag)
.packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms))); .packets.push_back(Adopt(new bool(true)).At(Timestamp(time_ms)));
} }
time_ms += kTimestampDiff; time_ms += kTimestampDiff;
@ -306,8 +323,8 @@ void AddScene(const int start_frame_index, const int num_scene_frames,
void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames, void CheckCroppedFrames(const CalculatorRunner& runner, const int num_frames,
const int target_width, const int target_height) { const int target_width, const int target_height) {
const auto& outputs = runner.Outputs(); const auto& outputs = runner.Outputs();
EXPECT_TRUE(outputs.HasTag("CROPPED_FRAMES")); EXPECT_TRUE(outputs.HasTag(kCroppedFramesTag));
const auto& cropped_frames_outputs = outputs.Tag("CROPPED_FRAMES").packets; const auto& cropped_frames_outputs = outputs.Tag(kCroppedFramesTag).packets;
EXPECT_EQ(cropped_frames_outputs.size(), num_frames); EXPECT_EQ(cropped_frames_outputs.size(), num_frames);
for (int i = 0; i < num_frames; ++i) { for (int i = 0; i < num_frames; ++i) {
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>(); const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
@ -392,23 +409,23 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) {
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs(); const auto& outputs = runner->Outputs();
EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES")); EXPECT_TRUE(outputs.HasTag(kKeyFrameCropRegionVizFramesTag));
EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES")); EXPECT_TRUE(outputs.HasTag(kSalientPointFrameVizFramesTag));
EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY")); EXPECT_TRUE(outputs.HasTag(kCroppingSummaryTag));
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME")); EXPECT_TRUE(outputs.HasTag(kExternalRenderingPerFrameTag));
EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID")); EXPECT_TRUE(outputs.HasTag(kExternalRenderingFullVidTag));
EXPECT_TRUE(outputs.HasTag("FRAMING_DETECTIONS_VIZ_FRAMES")); EXPECT_TRUE(outputs.HasTag(kFramingDetectionsVizFramesTag));
const auto& crop_region_viz_frames_outputs = const auto& crop_region_viz_frames_outputs =
outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets; outputs.Tag(kKeyFrameCropRegionVizFramesTag).packets;
const auto& salient_point_viz_frames_outputs = const auto& salient_point_viz_frames_outputs =
outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets; outputs.Tag(kSalientPointFrameVizFramesTag).packets;
const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets; const auto& summary_output = outputs.Tag(kCroppingSummaryTag).packets;
const auto& ext_render_per_frame = const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; outputs.Tag(kExternalRenderingPerFrameTag).packets;
const auto& ext_render_full_vid = const auto& ext_render_full_vid =
outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets; outputs.Tag(kExternalRenderingFullVidTag).packets;
const auto& framing_viz_frames_output = const auto& framing_viz_frames_output =
outputs.Tag("FRAMING_DETECTIONS_VIZ_FRAMES").packets; outputs.Tag(kFramingDetectionsVizFramesTag).packets;
EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames); EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames); EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames);
EXPECT_EQ(framing_viz_frames_output.size(), num_frames); EXPECT_EQ(framing_viz_frames_output.size(), num_frames);
@ -597,7 +614,7 @@ TEST(SceneCroppingCalculatorTest, ProducesEvenFrameSize) {
kKeyFrameHeight, kDownSampleRate, runner->MutableInputs()); kKeyFrameHeight, kDownSampleRate, runner->MutableInputs());
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& output_frame = runner->Outputs() const auto& output_frame = runner->Outputs()
.Tag("CROPPED_FRAMES") .Tag(kCroppedFramesTag)
.packets[0] .packets[0]
.Get<ImageFrame>(); .Get<ImageFrame>();
EXPECT_EQ(output_frame.Width() % 2, 0); EXPECT_EQ(output_frame.Width() % 2, 0);
@ -646,7 +663,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
Timestamp timestamp(time_ms); Timestamp timestamp(time_ms);
auto frame = auto frame =
MakeImageFrameFromColor(GetRandomColor(), input_width, input_height); MakeImageFrameFromColor(GetRandomColor(), input_width, input_height);
inputs->Tag("VIDEO_FRAMES") inputs->Tag(kVideoFramesTag)
.packets.push_back(Adopt(frame.release()).At(timestamp)); .packets.push_back(Adopt(frame.release()).At(timestamp));
if (i % static_features_downsample_rate == 0) { if (i % static_features_downsample_rate == 0) {
auto static_features = absl::make_unique<StaticFeatures>(); auto static_features = absl::make_unique<StaticFeatures>();
@ -657,7 +674,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
color->set_g(green); color->set_g(green);
color->set_b(red); color->set_b(red);
} }
inputs->Tag("STATIC_FEATURES") inputs->Tag(kStaticFeaturesTag)
.packets.push_back(Adopt(static_features.release()).At(timestamp)); .packets.push_back(Adopt(static_features.release()).At(timestamp));
num_static_features++; num_static_features++;
} }
@ -672,7 +689,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
location->set_y(0); location->set_y(0);
location->set_width(80); location->set_width(80);
location->set_height(input_height); location->set_height(input_height);
inputs->Tag("DETECTION_FEATURES") inputs->Tag(kDetectionFeaturesTag)
.packets.push_back(Adopt(detections.release()).At(timestamp)); .packets.push_back(Adopt(detections.release()).At(timestamp));
} }
time_ms += kTimestampDiff; time_ms += kTimestampDiff;
@ -683,7 +700,7 @@ TEST(SceneCroppingCalculatorTest, PadsWithSolidColorFromStaticFeatures) {
// Checks that the top and bottom borders indeed have the background color. // Checks that the top and bottom borders indeed have the background color.
const int border_size = 37; const int border_size = 37;
const auto& cropped_frames_outputs = const auto& cropped_frames_outputs =
runner->Outputs().Tag("CROPPED_FRAMES").packets; runner->Outputs().Tag(kCroppedFramesTag).packets;
EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize); EXPECT_EQ(cropped_frames_outputs.size(), kSceneSize);
for (int i = 0; i < kSceneSize; ++i) { for (int i = 0; i < kSceneSize; ++i) {
const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>(); const auto& cropped_frame = cropped_frames_outputs[i].Get<ImageFrame>();
@ -727,7 +744,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
auto mat = formats::MatView(frame.get()); auto mat = formats::MatView(frame.get());
mat(top_border_rect) = border_color; mat(top_border_rect) = border_color;
mat(bottom_border_rect) = border_color; mat(bottom_border_rect) = border_color;
inputs->Tag("VIDEO_FRAMES") inputs->Tag(kVideoFramesTag)
.packets.push_back(Adopt(frame.release()).At(timestamp)); .packets.push_back(Adopt(frame.release()).At(timestamp));
// Set borders in static features. // Set borders in static features.
auto static_features = absl::make_unique<StaticFeatures>(); auto static_features = absl::make_unique<StaticFeatures>();
@ -737,11 +754,11 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
auto* bottom_part = static_features->add_border(); auto* bottom_part = static_features->add_border();
bottom_part->set_relative_position(Border::BOTTOM); bottom_part->set_relative_position(Border::BOTTOM);
bottom_part->mutable_border_position()->set_height(bottom_border_size); bottom_part->mutable_border_position()->set_height(bottom_border_size);
inputs->Tag("STATIC_FEATURES") inputs->Tag(kStaticFeaturesTag)
.packets.push_back(Adopt(static_features.release()).At(timestamp)); .packets.push_back(Adopt(static_features.release()).At(timestamp));
// Add empty detections to ensure no padding is used. // Add empty detections to ensure no padding is used.
auto detections = absl::make_unique<DetectionSet>(); auto detections = absl::make_unique<DetectionSet>();
inputs->Tag("DETECTION_FEATURES") inputs->Tag(kDetectionFeaturesTag)
.packets.push_back(Adopt(detections.release()).At(timestamp)); .packets.push_back(Adopt(detections.release()).At(timestamp));
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
@ -749,7 +766,7 @@ TEST(SceneCroppingCalculatorTest, RemovesStaticBorders) {
// Checks that the top and bottom borders are removed. Each frame should have // Checks that the top and bottom borders are removed. Each frame should have
// solid color equal to frame color. // solid color equal to frame color.
const auto& cropped_frames_outputs = const auto& cropped_frames_outputs =
runner->Outputs().Tag("CROPPED_FRAMES").packets; runner->Outputs().Tag(kCroppedFramesTag).packets;
EXPECT_EQ(cropped_frames_outputs.size(), 1); EXPECT_EQ(cropped_frames_outputs.size(), 1);
const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>(); const auto& cropped_frame = cropped_frames_outputs[0].Get<ImageFrame>();
const auto cropped_mat = formats::MatView(&cropped_frame); const auto cropped_mat = formats::MatView(&cropped_frame);
@ -775,7 +792,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPath) {
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs(); const auto& outputs = runner->Outputs();
const auto& ext_render_per_frame = const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; outputs.Tag(kExternalRenderingPerFrameTag).packets;
EXPECT_EQ(ext_render_per_frame.size(), num_frames); EXPECT_EQ(ext_render_per_frame.size(), num_frames);
for (int i = 0; i < num_frames - 1; ++i) { for (int i = 0; i < num_frames - 1; ++i) {
@ -813,7 +830,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPath) {
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs(); const auto& outputs = runner->Outputs();
const auto& ext_render_per_frame = const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; outputs.Tag(kExternalRenderingPerFrameTag).packets;
EXPECT_EQ(ext_render_per_frame.size(), num_frames); EXPECT_EQ(ext_render_per_frame.size(), num_frames);
for (int i = 0; i < num_frames - 1; ++i) { for (int i = 0; i < num_frames - 1; ++i) {
@ -846,7 +863,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessagePolyPathNoVideo) {
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs(); const auto& outputs = runner->Outputs();
const auto& ext_render_per_frame = const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; outputs.Tag(kExternalRenderingPerFrameTag).packets;
EXPECT_EQ(ext_render_per_frame.size(), num_frames); EXPECT_EQ(ext_render_per_frame.size(), num_frames);
for (int i = 0; i < num_frames - 1; ++i) { for (int i = 0; i < num_frames - 1; ++i) {
@ -886,7 +903,7 @@ TEST(SceneCroppingCalculatorTest, OutputsCropMessageKinematicPathNoVideo) {
MP_EXPECT_OK(runner->Run()); MP_EXPECT_OK(runner->Run());
const auto& outputs = runner->Outputs(); const auto& outputs = runner->Outputs();
const auto& ext_render_per_frame = const auto& ext_render_per_frame =
outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; outputs.Tag(kExternalRenderingPerFrameTag).packets;
EXPECT_EQ(ext_render_per_frame.size(), num_frames); EXPECT_EQ(ext_render_per_frame.size(), num_frames);
for (int i = 0; i < num_frames - 1; ++i) { for (int i = 0; i < num_frames - 1; ++i) {

View File

@ -43,6 +43,9 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kIsShotChangeTag[] = "IS_SHOT_CHANGE";
constexpr char kVideoTag[] = "VIDEO";
const char kConfig[] = R"( const char kConfig[] = R"(
calculator: "ShotBoundaryCalculator" calculator: "ShotBoundaryCalculator"
input_stream: "VIDEO:camera_frames" input_stream: "VIDEO:camera_frames"
@ -70,7 +73,7 @@ void AddFrames(const int number_of_frames, const std::set<int>& skip_frames,
if (skip_frames.count(i) < 1) { if (skip_frames.count(i) < 1) {
sub_image.copyTo(frame_area); sub_image.copyTo(frame_area);
} }
runner->MutableInputs()->Tag("VIDEO").packets.push_back( runner->MutableInputs()->Tag(kVideoTag).packets.push_back(
Adopt(input_frame.release()).At(Timestamp(i * 1000000))); Adopt(input_frame.release()).At(Timestamp(i * 1000000)));
} }
} }
@ -97,7 +100,7 @@ TEST(ShotBoundaryCalculatorTest, NoShotChange) {
AddFrames(10, {}, runner.get()); AddFrames(10, {}, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckOutput(10, {}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets); CheckOutput(10, {}, runner->Outputs().Tag(kIsShotChangeTag).packets);
} }
TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) { TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
@ -110,7 +113,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingle) {
AddFrames(20, {10}, runner.get()); AddFrames(20, {10}, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckOutput(20, {10}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets); CheckOutput(20, {10}, runner->Outputs().Tag(kIsShotChangeTag).packets);
} }
TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) { TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
@ -123,7 +126,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeDouble) {
AddFrames(20, {14, 17}, runner.get()); AddFrames(20, {14, 17}, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckOutput(20, {14, 17}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets); CheckOutput(20, {14, 17}, runner->Outputs().Tag(kIsShotChangeTag).packets);
} }
TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) { TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
@ -140,7 +143,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeFiltered) {
AddFrames(24, {16, 19}, runner.get()); AddFrames(24, {16, 19}, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
CheckOutput(24, {16}, runner->Outputs().Tag("IS_SHOT_CHANGE").packets); CheckOutput(24, {16}, runner->Outputs().Tag(kIsShotChangeTag).packets);
} }
TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) { TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
@ -153,7 +156,7 @@ TEST(ShotBoundaryCalculatorTest, ShotChangeSingleOnOnChange) {
AddFrames(20, {15}, runner.get()); AddFrames(20, {15}, runner.get());
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
auto output_packets = runner->Outputs().Tag("IS_SHOT_CHANGE").packets; auto output_packets = runner->Outputs().Tag(kIsShotChangeTag).packets;
ASSERT_EQ(output_packets.size(), 1); ASSERT_EQ(output_packets.size(), 1);
ASSERT_EQ(output_packets[0].Get<bool>(), true); ASSERT_EQ(output_packets[0].Get<bool>(), true);
ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000); ASSERT_EQ(output_packets[0].Timestamp().Value(), 15000000);

View File

@ -32,6 +32,9 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kOutputTag[] = "OUTPUT";
constexpr char kIsShotBoundaryTag[] = "IS_SHOT_BOUNDARY";
const char kConfigA[] = R"( const char kConfigA[] = R"(
calculator: "SignalFusingCalculator" calculator: "SignalFusingCalculator"
input_stream: "scene_change" input_stream: "scene_change"
@ -160,7 +163,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
auto input_shot = absl::make_unique<bool>(false); auto input_shot = absl::make_unique<bool>(false);
runner->MutableInputs() runner->MutableInputs()
->Tag("IS_SHOT_BOUNDARY") ->Tag(kIsShotBoundaryTag)
.packets.push_back(Adopt(input_shot.release()).At(Timestamp(0))); .packets.push_back(Adopt(input_shot.release()).At(Timestamp(0)));
auto input_face = auto input_face =
@ -200,7 +203,7 @@ TEST(SignalFusingCalculatorTest, TwoInputShotLabeledTags) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("OUTPUT").packets; runner->Outputs().Tag(kOutputTag).packets;
const auto& detection_set = output_packets[0].Get<DetectionSet>(); const auto& detection_set = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(detection_set.detections().size(), 4); ASSERT_EQ(detection_set.detections().size(), 4);
@ -251,7 +254,7 @@ TEST(SignalFusingCalculatorTest, TwoInputNoShotLabeledTags) {
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const std::vector<Packet>& output_packets = const std::vector<Packet>& output_packets =
runner->Outputs().Tag("OUTPUT").packets; runner->Outputs().Tag(kOutputTag).packets;
const auto& detection_set = output_packets[0].Get<DetectionSet>(); const auto& detection_set = output_packets[0].Get<DetectionSet>();
ASSERT_EQ(detection_set.detections().size(), 4); ASSERT_EQ(detection_set.detections().size(), 4);

View File

@ -31,6 +31,9 @@ namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
constexpr char kOutputFramesTag[] = "OUTPUT_FRAMES";
constexpr char kInputFramesTag[] = "INPUT_FRAMES";
// Default configuration of the calculator. // Default configuration of the calculator.
CalculatorGraphConfig::Node GetCalculatorNode( CalculatorGraphConfig::Node GetCalculatorNode(
const std::string& fail_if_any, const std::string& extra_options = "") { const std::string& fail_if_any, const std::string& extra_options = "") {
@ -65,10 +68,10 @@ TEST(VideoFilterCalculatorTest, UpperBoundNoPass) {
ImageFormat::SRGB, kFixedWidth, ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16); static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs() runner->MutableInputs()
->Tag("INPUT_FRAMES") ->Tag(kInputFramesTag)
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000))); .packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets; const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
EXPECT_TRUE(output_packet.empty()); EXPECT_TRUE(output_packet.empty());
} }
@ -88,10 +91,10 @@ TEST(VerticalFrameRemovalCalculatorTest, UpperBoundPass) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
runner->MutableInputs() runner->MutableInputs()
->Tag("INPUT_FRAMES") ->Tag(kInputFramesTag)
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000))); .packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets; const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
EXPECT_EQ(1, output_packet.size()); EXPECT_EQ(1, output_packet.size());
auto& output_frame = output_packet[0].Get<ImageFrame>(); auto& output_frame = output_packet[0].Get<ImageFrame>();
EXPECT_EQ(kWidth, output_frame.Width()); EXPECT_EQ(kWidth, output_frame.Width());
@ -114,10 +117,10 @@ TEST(VideoFilterCalculatorTest, LowerBoundNoPass) {
ImageFormat::SRGB, kFixedWidth, ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16); static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs() runner->MutableInputs()
->Tag("INPUT_FRAMES") ->Tag(kInputFramesTag)
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000))); .packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets; const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
EXPECT_TRUE(output_packet.empty()); EXPECT_TRUE(output_packet.empty());
} }
@ -137,10 +140,10 @@ TEST(VerticalFrameRemovalCalculatorTest, LowerBoundPass) {
auto input_frame = auto input_frame =
::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16); ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, kWidth, kHeight, 16);
runner->MutableInputs() runner->MutableInputs()
->Tag("INPUT_FRAMES") ->Tag(kInputFramesTag)
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000))); .packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
MP_ASSERT_OK(runner->Run()); MP_ASSERT_OK(runner->Run());
const auto& output_packet = runner->Outputs().Tag("OUTPUT_FRAMES").packets; const auto& output_packet = runner->Outputs().Tag(kOutputFramesTag).packets;
EXPECT_EQ(1, output_packet.size()); EXPECT_EQ(1, output_packet.size());
auto& output_frame = output_packet[0].Get<ImageFrame>(); auto& output_frame = output_packet[0].Get<ImageFrame>();
EXPECT_EQ(kWidth, output_frame.Width()); EXPECT_EQ(kWidth, output_frame.Width());
@ -164,7 +167,7 @@ TEST(VerticalFrameRemovalCalculatorTest, OutputError) {
ImageFormat::SRGB, kFixedWidth, ImageFormat::SRGB, kFixedWidth,
static_cast<int>(kFixedWidth / kAspectRatio), 16); static_cast<int>(kFixedWidth / kAspectRatio), 16);
runner->MutableInputs() runner->MutableInputs()
->Tag("INPUT_FRAMES") ->Tag(kInputFramesTag)
.packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000))); .packets.push_back(Adopt(input_frame.release()).At(Timestamp(1000)));
absl::Status status = runner->Run(); absl::Status status = runner->Run();
EXPECT_EQ(status.code(), absl::StatusCode::kUnknown); EXPECT_EQ(status.code(), absl::StatusCode::kUnknown);

View File

@ -1,5 +1,7 @@
#include "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.h" #include "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.h"
constexpr float kMinVelocity = 0.5;
namespace mediapipe { namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
@ -75,6 +77,7 @@ absl::Status KinematicPathSolver::AddObservation(int position,
current_position_px_ = position; current_position_px_ = position;
} }
target_position_px_ = position; target_position_px_ = position;
prior_position_px_ = current_position_px_;
motion_state_ = false; motion_state_ = false;
mean_delta_t_ = -1; mean_delta_t_ = -1;
raw_positions_at_time_.push_front( raw_positions_at_time_.push_front(
@ -106,6 +109,11 @@ absl::Status KinematicPathSolver::AddObservation(int position,
options_.reframe_window()) options_.reframe_window())
<< "Reframe window cannot exceed min_motion_to_reframe."; << "Reframe window cannot exceed min_motion_to_reframe.";
} }
RET_CHECK(options_.has_max_velocity() ^
(options_.has_max_velocity_scale() &&
options_.has_max_velocity_shift()))
<< "Must either set max_velocity or set both max_velocity_scale and "
"max_velocity_shift.";
return absl::OkStatus(); return absl::OkStatus();
} }
@ -123,9 +131,29 @@ absl::Status KinematicPathSolver::AddObservation(int position,
} }
int filtered_position = Median(raw_positions_at_time_); int filtered_position = Median(raw_positions_at_time_);
float min_reframe = (options_.has_min_motion_to_reframe()
? options_.min_motion_to_reframe()
: options_.min_motion_to_reframe_lower()) *
pixels_per_degree_;
float max_reframe = (options_.has_min_motion_to_reframe()
? options_.min_motion_to_reframe()
: options_.min_motion_to_reframe_upper()) *
pixels_per_degree_;
filtered_position = fmax(min_location_ - min_reframe, filtered_position);
filtered_position = fmin(max_location_ + max_reframe, filtered_position);
double delta_degs = double delta_degs =
(filtered_position - current_position_px_) / pixels_per_degree_; (filtered_position - current_position_px_) / pixels_per_degree_;
double max_velocity =
options_.has_max_velocity()
? options_.max_velocity()
: fmax(abs(delta_degs * options_.max_velocity_scale()) +
options_.max_velocity_shift(),
kMinVelocity);
// If the motion is smaller than the min_motion_to_reframe and camera is // If the motion is smaller than the min_motion_to_reframe and camera is
// stationary, don't use the update. // stationary, don't use the update.
if (IsMotionTooSmall(delta_degs) && !motion_state_) { if (IsMotionTooSmall(delta_degs) && !motion_state_) {
@ -169,10 +197,9 @@ absl::Status KinematicPathSolver::AddObservation(int position,
options_.max_update_rate()); options_.max_update_rate());
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) + double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
observed_velocity * update_rate; observed_velocity * update_rate;
// Limited current velocity. current_velocity_deg_per_s_ = updated_velocity > 0
current_velocity_deg_per_s_ = ? fmin(updated_velocity, max_velocity)
updated_velocity > 0 ? fmin(updated_velocity, options_.max_velocity()) : fmax(updated_velocity, -max_velocity);
: fmax(updated_velocity, -options_.max_velocity());
// Update prediction based on time input. // Update prediction based on time input.
return UpdatePrediction(time_us); return UpdatePrediction(time_us);
@ -182,6 +209,9 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
RET_CHECK(current_time_ < time_us) RET_CHECK(current_time_ < time_us)
<< "Prediction time added before a prior observation or prediction."; << "Prediction time added before a prior observation or prediction.";
// Store prior pixel location.
prior_position_px_ = current_position_px_;
// Position update limited by min/max. // Position update limited by min/max.
double update_position_px = double update_position_px =
current_position_px_ + current_position_px_ +
@ -209,7 +239,19 @@ absl::Status KinematicPathSolver::GetState(int* position) {
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status KinematicPathSolver::SetState(const int position) { absl::Status KinematicPathSolver::GetState(float* position) {
RET_CHECK(initialized_) << "GetState called before first observation added.";
*position = current_position_px_;
return absl::OkStatus();
}
absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
RET_CHECK(initialized_) << "GetState called before first observation added.";
*delta_position = current_position_px_ - prior_position_px_;
return absl::OkStatus();
}
absl::Status KinematicPathSolver::SetState(const float position) {
RET_CHECK(initialized_) << "SetState called before first observation added."; RET_CHECK(initialized_) << "SetState called before first observation added.";
current_position_px_ = position; current_position_px_ = position;
return absl::OkStatus(); return absl::OkStatus();
@ -218,7 +260,15 @@ absl::Status KinematicPathSolver::SetState(const int position) {
absl::Status KinematicPathSolver::GetTargetPosition(int* target_position) { absl::Status KinematicPathSolver::GetTargetPosition(int* target_position) {
RET_CHECK(initialized_) RET_CHECK(initialized_)
<< "GetTargetPosition called before first observation added."; << "GetTargetPosition called before first observation added.";
*target_position = round(target_position_px_);
// Provide target position clamped by min/max locations.
if (target_position_px_ < min_location_) {
*target_position = min_location_;
} else if (target_position_px_ > max_location_) {
*target_position = max_location_;
} else {
*target_position = round(target_position_px_);
}
return absl::OkStatus(); return absl::OkStatus();
} }
@ -238,6 +288,7 @@ absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
double updated_distance = max_location - min_location; double updated_distance = max_location - min_location;
double scale_change = updated_distance / prior_distance; double scale_change = updated_distance / prior_distance;
current_position_px_ = current_position_px_ * scale_change; current_position_px_ = current_position_px_ * scale_change;
prior_position_px_ = prior_position_px_ * scale_change;
target_position_px_ = target_position_px_ * scale_change; target_position_px_ = target_position_px_ * scale_change;
max_location_ = max_location; max_location_ = max_location;
min_location_ = min_location; min_location_ = min_location;

View File

@ -46,10 +46,12 @@ class KinematicPathSolver {
absl::Status AddObservation(int position, const uint64 time_us); absl::Status AddObservation(int position, const uint64 time_us);
// Get the predicted position at a time. // Get the predicted position at a time.
absl::Status UpdatePrediction(const int64 time_us); absl::Status UpdatePrediction(const int64 time_us);
// Get the state at a time. // Get the state at a time, as an int.
absl::Status GetState(int* position); absl::Status GetState(int* position);
// Get the state at a time, as a float.
absl::Status GetState(float* position);
// Overwrite the current state value. // Overwrite the current state value.
absl::Status SetState(const int position); absl::Status SetState(const float position);
// Update PixelPerDegree value. // Update PixelPerDegree value.
absl::Status UpdatePixelsPerDegree(const float pixels_per_degree); absl::Status UpdatePixelsPerDegree(const float pixels_per_degree);
// Provide the current target position of the reframe action. // Provide the current target position of the reframe action.
@ -66,6 +68,8 @@ class KinematicPathSolver {
// Clear any history buffer of positions that are used when // Clear any history buffer of positions that are used when
// filtering_time_window_us is set to a non-zero value. // filtering_time_window_us is set to a non-zero value.
void ClearHistory(); void ClearHistory();
// Provides the change in position from last state.
absl::Status GetDeltaState(float* delta_position);
private: private:
// Tuning options. // Tuning options.
@ -77,6 +81,7 @@ class KinematicPathSolver {
float pixels_per_degree_; float pixels_per_degree_;
// Current state values. // Current state values.
double current_position_px_; double current_position_px_;
double prior_position_px_;
double current_velocity_deg_per_s_; double current_velocity_deg_per_s_;
uint64 current_time_; uint64 current_time_;
// History of observations (second) and their time (first). // History of observations (second) and their time (first).

View File

@ -6,8 +6,9 @@ message KinematicOptions {
// Weighted update of new camera velocity (measurement) vs current state // Weighted update of new camera velocity (measurement) vs current state
// (prediction). // (prediction).
optional double update_rate = 1 [default = 0.5, deprecated = true]; optional double update_rate = 1 [default = 0.5, deprecated = true];
// Max velocity (degrees per second) that the camera can move. // Max velocity (degrees per second) that the camera can move. Cannot be used
optional double max_velocity = 2 [default = 18]; // with max_velocity_scale or max_velocity_shift.
optional double max_velocity = 2;
// Min motion (in degrees) to react for both upper and lower directions. Must // Min motion (in degrees) to react for both upper and lower directions. Must
// not be set if using min_motion_to_reframe_lower and // not be set if using min_motion_to_reframe_lower and
// min_motion_to_reframe_upper. // min_motion_to_reframe_upper.
@ -30,4 +31,12 @@ message KinematicOptions {
optional int64 filtering_time_window_us = 7 [default = 0]; optional int64 filtering_time_window_us = 7 [default = 0];
// Weighted update of average period, used for motion updates. // Weighted update of average period, used for motion updates.
optional float mean_period_update_rate = 8 [default = 0.25]; optional float mean_period_update_rate = 8 [default = 0.25];
// Scale factor for max velocity, to be multiplied by the distance from center
// in degrees. Cannot be used with max_velocity and must be used with
// max_velocity_shift.
optional float max_velocity_scale = 11;
// Shift factor for max velocity, to be added to the scaled distance from
// center in degrees. Cannot be used with max_velocity and must be used with
// max_velocity_scale.
optional float max_velocity_shift = 12;
} }

View File

@ -36,7 +36,7 @@ TEST(KinematicPathSolverTest, FailZeroPixelsPerDegree) {
TEST(KinematicPathSolverTest, FailNotInitializedState) { TEST(KinematicPathSolverTest, FailNotInitializedState) {
KinematicOptions options; KinematicOptions options;
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
EXPECT_FALSE(solver.GetState(&state).ok()); EXPECT_FALSE(solver.GetState(&state).ok());
} }
@ -55,13 +55,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionLargeImg) {
options.set_max_velocity(1000); options.set_max_velocity(1000);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move. // Expect cam to not move.
EXPECT_EQ(state, 500); EXPECT_FLOAT_EQ(state, 500);
} }
TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) { TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
@ -72,13 +72,13 @@ TEST(KinematicPathSolverTest, PassNotEnoughMotionSmallImg) {
options.set_max_velocity(500); options.set_max_velocity(500);
// Set degrees / pixel to 8.3 // Set degrees / pixel to 8.3
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
// Move target by 10px / 8.3 = 1.2deg // Move target by 10px / 8.3 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move. // Expect cam to not move.
EXPECT_EQ(state, 400); EXPECT_FLOAT_EQ(state, 400);
} }
TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) { TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
@ -90,7 +90,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
options.set_filtering_time_window_us(3000000); options.set_filtering_time_window_us(3000000);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
@ -98,7 +98,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionFiltered) {
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move. // Expect cam to not move.
EXPECT_EQ(state, 500); EXPECT_FLOAT_EQ(state, 500);
} }
TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) { TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
@ -110,7 +110,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
options.set_filtering_time_window_us(0); options.set_filtering_time_window_us(0);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 1));
@ -118,7 +118,7 @@ TEST(KinematicPathSolverTest, PassEnoughMotionNotFiltered) {
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 3));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move. // Expect cam to not move.
EXPECT_EQ(state, 506); EXPECT_FLOAT_EQ(state, 506.4);
} }
TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) { TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
@ -130,13 +130,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionLargeImg) {
options.set_max_velocity(1000); options.set_max_velocity(1000);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move. // Expect cam to move.
EXPECT_EQ(state, 520); EXPECT_FLOAT_EQ(state, 520);
} }
TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) { TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
@ -148,13 +148,13 @@ TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) {
options.set_max_velocity(18); options.set_max_velocity(18);
// Set degrees / pixel to 8.3 // Set degrees / pixel to 8.3
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
// Move target by 10px / 8.3 = 1.2deg // Move target by 10px / 8.3 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move. // Expect cam to move.
EXPECT_EQ(state, 410); EXPECT_FLOAT_EQ(state, 410);
} }
TEST(KinematicPathSolverTest, FailReframeWindowSetting) { TEST(KinematicPathSolverTest, FailReframeWindowSetting) {
@ -181,13 +181,13 @@ TEST(KinematicPathSolverTest, PassReframeWindow) {
options.set_reframe_window(0.75); options.set_reframe_window(0.75);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 = // Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 =
EXPECT_EQ(state, 508); EXPECT_FLOAT_EQ(state, 507.5);
} }
TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) { TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
@ -202,17 +202,17 @@ TEST(KinematicPathSolverTest, PassReframeWindowLowerUpper) {
options.set_reframe_window(0.75); options.set_reframe_window(0.75);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move // Expect cam to not move
EXPECT_EQ(state, 500); EXPECT_FLOAT_EQ(state, 500);
MP_ASSERT_OK(solver.AddObservation(480, kMicroSecInSec * 2)); MP_ASSERT_OK(solver.AddObservation(480, kMicroSecInSec * 2));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move // Expect cam to move
EXPECT_EQ(state, 493); EXPECT_FLOAT_EQ(state, 492.5);
} }
TEST(KinematicPathSolverTest, PassCheckState) { TEST(KinematicPathSolverTest, PassCheckState) {
@ -241,12 +241,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate30FPS) {
options.set_max_update_rate(0.8); options.set_max_update_rate(0.8);
options.set_max_velocity(18); options.set_max_velocity(18);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 30));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// (0.033 / .25) * 20 = // (0.033 / .25) * 20 =
EXPECT_EQ(state, 503); EXPECT_FLOAT_EQ(state, 502.6667);
} }
TEST(KinematicPathSolverTest, PassUpdateRate10FPS) { TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
@ -256,12 +256,12 @@ TEST(KinematicPathSolverTest, PassUpdateRate10FPS) {
options.set_max_update_rate(0.8); options.set_max_update_rate(0.8);
options.set_max_velocity(18); options.set_max_velocity(18);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1 / 10));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// (0.1 / .25) * 20 = // (0.1 / .25) * 20 =
EXPECT_EQ(state, 508); EXPECT_FLOAT_EQ(state, 508);
} }
TEST(KinematicPathSolverTest, PassUpdateRate) { TEST(KinematicPathSolverTest, PassUpdateRate) {
@ -271,7 +271,8 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
options.set_max_update_rate(1.0); options.set_max_update_rate(1.0);
options.set_max_velocity(18); options.set_max_velocity(18);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state, target_position; int target_position;
float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.GetTargetPosition(&target_position)); MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
EXPECT_EQ(target_position, 500); EXPECT_EQ(target_position, 500);
@ -279,7 +280,7 @@ TEST(KinematicPathSolverTest, PassUpdateRate) {
MP_ASSERT_OK(solver.GetTargetPosition(&target_position)); MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
EXPECT_EQ(target_position, 520); EXPECT_EQ(target_position, 520);
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 505); EXPECT_FLOAT_EQ(state, 505);
} }
TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) { TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
@ -289,7 +290,8 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
options.set_max_update_rate(1.0); options.set_max_update_rate(1.0);
options.set_max_velocity(18); options.set_max_velocity(18);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state, target_position; int target_position;
float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.GetTargetPosition(&target_position)); MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
EXPECT_EQ(target_position, 500); EXPECT_EQ(target_position, 500);
@ -299,10 +301,10 @@ TEST(KinematicPathSolverTest, PassUpdateRateResolutionChange) {
MP_ASSERT_OK(solver.GetTargetPosition(&target_position)); MP_ASSERT_OK(solver.GetTargetPosition(&target_position));
EXPECT_EQ(target_position, 520 * 0.5); EXPECT_EQ(target_position, 520 * 0.5);
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 253); EXPECT_FLOAT_EQ(state, 252.5);
} }
TEST(KinematicPathSolverTest, PassMaxVelocity) { TEST(KinematicPathSolverTest, PassMaxVelocityInt) {
KinematicOptions options; KinematicOptions options;
options.set_min_motion_to_reframe(1.0); options.set_min_motion_to_reframe(1.0);
options.set_update_rate(1.0); options.set_update_rate(1.0);
@ -315,6 +317,33 @@ TEST(KinematicPathSolverTest, PassMaxVelocity) {
EXPECT_EQ(state, 600); EXPECT_EQ(state, 600);
} }
TEST(KinematicPathSolverTest, PassMaxVelocity) {
KinematicOptions options;
options.set_min_motion_to_reframe(1.0);
options.set_update_rate(1.0);
options.set_max_velocity(6);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 600);
}
TEST(KinematicPathSolverTest, PassMaxVelocityScale) {
KinematicOptions options;
options.set_min_motion_to_reframe(1.0);
options.set_update_rate(1.0);
options.set_max_velocity_scale(0.4);
options.set_max_velocity_shift(-2.0);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.AddObservation(1000, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 666.6667);
}
TEST(KinematicPathSolverTest, PassDegPerPxChange) { TEST(KinematicPathSolverTest, PassDegPerPxChange) {
KinematicOptions options; KinematicOptions options;
// Set min motion to 2deg // Set min motion to 2deg
@ -323,18 +352,18 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
options.set_max_velocity(1000); options.set_max_velocity(1000);
// Set degrees / pixel to 16.6 // Set degrees / pixel to 16.6
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
// Move target by 20px / 16.6 = 1.2deg // Move target by 20px / 16.6 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to not move. // Expect cam to not move.
EXPECT_EQ(state, 500); EXPECT_FLOAT_EQ(state, 500);
MP_ASSERT_OK(solver.UpdatePixelsPerDegree(500.0 / kWidthFieldOfView)); MP_ASSERT_OK(solver.UpdatePixelsPerDegree(500.0 / kWidthFieldOfView));
MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 2)); MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 2));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move. // Expect cam to move.
EXPECT_EQ(state, 516); EXPECT_FLOAT_EQ(state, 516);
} }
TEST(KinematicPathSolverTest, NoTimestampSmoothing) { TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
@ -344,14 +373,14 @@ TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
options.set_max_velocity(6); options.set_max_velocity(6);
options.set_mean_period_update_rate(1.0); options.set_mean_period_update_rate(1.0);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, 0)); MP_ASSERT_OK(solver.AddObservation(500, 0));
MP_ASSERT_OK(solver.AddObservation(1000, 1000000)); MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 600); EXPECT_FLOAT_EQ(state, 600);
MP_ASSERT_OK(solver.AddObservation(1000, 2200000)); MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 720); EXPECT_FLOAT_EQ(state, 720);
} }
TEST(KinematicPathSolverTest, TimestampSmoothing) { TEST(KinematicPathSolverTest, TimestampSmoothing) {
@ -361,14 +390,14 @@ TEST(KinematicPathSolverTest, TimestampSmoothing) {
options.set_max_velocity(6); options.set_max_velocity(6);
options.set_mean_period_update_rate(0.05); options.set_mean_period_update_rate(0.05);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(500, 0)); MP_ASSERT_OK(solver.AddObservation(500, 0));
MP_ASSERT_OK(solver.AddObservation(1000, 1000000)); MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 600); EXPECT_FLOAT_EQ(state, 600);
MP_ASSERT_OK(solver.AddObservation(1000, 2200000)); MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 701); EXPECT_FLOAT_EQ(state, 701);
} }
TEST(KinematicPathSolverTest, PassSetPosition) { TEST(KinematicPathSolverTest, PassSetPosition) {
@ -380,16 +409,30 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
options.set_max_velocity(18); options.set_max_velocity(18);
// Set degrees / pixel to 8.3 // Set degrees / pixel to 8.3
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView); KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
int state; float state;
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0)); MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
// Move target by 10px / 8.3 = 1.2deg // Move target by 10px / 8.3 = 1.2deg
MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1)); MP_ASSERT_OK(solver.AddObservation(410, kMicroSecInSec * 1));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
// Expect cam to move. // Expect cam to move.
EXPECT_EQ(state, 410); EXPECT_FLOAT_EQ(state, 410);
MP_ASSERT_OK(solver.SetState(400)); MP_ASSERT_OK(solver.SetState(400));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_EQ(state, 400); EXPECT_FLOAT_EQ(state, 400);
}
TEST(KinematicPathSolverTest, PassBorderTest) {
KinematicOptions options;
options.set_min_motion_to_reframe(1.0);
options.set_max_update_rate(0.25);
options.set_max_velocity_scale(0.5);
options.set_max_velocity_shift(-1.0);
KinematicPathSolver solver(options, 0, 500, 500.0 / kWidthFieldOfView);
float state;
MP_ASSERT_OK(solver.AddObservation(400, kMicroSecInSec * 0));
MP_ASSERT_OK(solver.AddObservation(800, kMicroSecInSec * 0.1));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 404.56668);
} }
} // namespace } // namespace

View File

@ -148,18 +148,18 @@ class SourceImpl {
explicit SourceImpl(std::vector<std::unique_ptr<Base>>* vec) explicit SourceImpl(std::vector<std::unique_ptr<Base>>* vec)
: SourceImpl(&GetWithAutoGrow(vec, 0)) {} : SourceImpl(&GetWithAutoGrow(vec, 0)) {}
explicit SourceImpl(SourceBase* base) : base_(*base) {} explicit SourceImpl(SourceBase* base) : base_(base) {}
template <typename U, template <typename U,
typename std::enable_if<AllowConnection<U>{}, int>::type = 0> typename std::enable_if<AllowConnection<U>{}, int>::type = 0>
Src& AddTarget(const Dst<U>& dest) { Src& AddTarget(const Dst<U>& dest) {
CHECK(dest.base_.source == nullptr); CHECK(dest.base_.source == nullptr);
dest.base_.source = &base_; dest.base_.source = base_;
base_.dests_.emplace_back(&dest.base_); base_->dests_.emplace_back(&dest.base_);
return *this; return *this;
} }
Src& SetName(std::string name) { Src& SetName(std::string name) {
base_.name_ = std::move(name); base_->name_ = std::move(name);
return *this; return *this;
} }
template <typename U> template <typename U>
@ -168,7 +168,8 @@ class SourceImpl {
} }
private: private:
SourceBase& base_; // Never null.
SourceBase* base_;
}; };
template <bool IsSide, typename T> template <bool IsSide, typename T>

View File

@ -1,5 +1,7 @@
#include "mediapipe/framework/api2/builder.h" #include "mediapipe/framework/api2/builder.h"
#include <functional>
#include "absl/strings/substitute.h" #include "absl/strings/substitute.h"
#include "mediapipe/framework/api2/node.h" #include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/packet.h" #include "mediapipe/framework/api2/packet.h"
@ -46,6 +48,88 @@ TEST(BuilderTest, BuildGraph) {
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected)); EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
} }
TEST(BuilderTest, CopyableSource) {
builder::Graph graph;
builder::Source<false, int> a = graph[Input<int>("A")];
a.SetName("a");
builder::Source<false, int> b = graph[Input<int>("B")];
b.SetName("b");
builder::SideSource<false, float> side_a = graph[SideInput<float>("SIDE_A")];
side_a.SetName("side_a");
builder::SideSource<false, float> side_b = graph[SideInput<float>("SIDE_B")];
side_b.SetName("side_b");
builder::Destination<false, int> out = graph[Output<int>("OUT")];
builder::SideDestination<false, float> side_out =
graph[SideOutput<float>("SIDE_OUT")];
builder::Source<false, int> input = a;
input = b;
builder::SideSource<false, float> side_input = side_b;
side_input = side_a;
input >> out;
side_input >> side_out;
CalculatorGraphConfig expected =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
input_stream: "A:a"
input_stream: "B:b"
output_stream: "OUT:b"
input_side_packet: "SIDE_A:side_a"
input_side_packet: "SIDE_B:side_b"
output_side_packet: "SIDE_OUT:side_a"
)pb");
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
}
TEST(BuilderTest, BuildGraphWithFunctions) {
builder::Graph graph;
builder::Source<false, int> base = graph[Input<int>("IN")];
base.SetName("base");
builder::SideSource<false, float> side = graph[SideInput<float>("SIDE")];
side.SetName("side");
auto foo_fn = [](builder::Source<false, int> base,
builder::SideSource<false, float> side,
builder::Graph& graph) {
auto& foo = graph.AddNode("Foo");
base >> foo[Input<int>("BASE")];
side >> foo[SideInput<float>("SIDE")];
return foo[Output<double>("OUT")];
};
builder::Source<false, double> foo_out = foo_fn(base, side, graph);
auto bar_fn = [](builder::Source<false, double> in, builder::Graph& graph) {
auto& bar = graph.AddNode("Bar");
in >> bar[Input<double>("IN")];
return bar[Output<double>("OUT")];
};
builder::Source<false, double> bar_out = bar_fn(foo_out, graph);
bar_out.SetName("out");
bar_out >> graph[Output<double>("OUT")];
CalculatorGraphConfig expected =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
input_stream: "IN:base"
input_side_packet: "SIDE:side"
output_stream: "OUT:out"
node {
calculator: "Foo"
input_stream: "BASE:base"
input_side_packet: "SIDE:side"
output_stream: "OUT:__stream_0"
}
node {
calculator: "Bar"
input_stream: "IN:__stream_0"
output_stream: "OUT:out"
}
)pb");
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
}
template <class FooT> template <class FooT>
void BuildGraphTypedTest() { void BuildGraphTypedTest() {
builder::Graph graph; builder::Graph graph;

View File

@ -1666,6 +1666,7 @@ TemplateParser::Parser::Parser()
allow_partial_(false), allow_partial_(false),
allow_case_insensitive_field_(false), allow_case_insensitive_field_(false),
allow_unknown_field_(false), allow_unknown_field_(false),
allow_unknown_extension_(true),
allow_unknown_enum_(false), allow_unknown_enum_(false),
allow_field_number_(false), allow_field_number_(false),
allow_relaxed_whitespace_(false), allow_relaxed_whitespace_(false),
@ -1683,12 +1684,11 @@ bool TemplateParser::Parser::Parse(io::ZeroCopyInputStream* input,
allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
: ParserImpl::FORBID_SINGULAR_OVERWRITES; : ParserImpl::FORBID_SINGULAR_OVERWRITES;
bool allow_unknown_extension = true;
int recursion_limit = std::numeric_limits<int>::max(); int recursion_limit = std::numeric_limits<int>::max();
MediaPipeParserImpl parser( MediaPipeParserImpl parser(
output->GetDescriptor(), input, error_collector_, finder_, output->GetDescriptor(), input, error_collector_, finder_,
parse_info_tree_, overwrites_policy, allow_case_insensitive_field_, parse_info_tree_, overwrites_policy, allow_case_insensitive_field_,
allow_unknown_field_, allow_unknown_extension, allow_unknown_enum_, allow_unknown_field_, allow_unknown_extension_, allow_unknown_enum_,
allow_field_number_, allow_relaxed_whitespace_, allow_partial_, allow_field_number_, allow_relaxed_whitespace_, allow_partial_,
recursion_limit); recursion_limit);
return MergeUsingImpl(input, output, &parser); return MergeUsingImpl(input, output, &parser);
@ -1702,13 +1702,12 @@ bool TemplateParser::Parser::ParseFromString(const std::string& input,
bool TemplateParser::Parser::Merge(io::ZeroCopyInputStream* input, bool TemplateParser::Parser::Merge(io::ZeroCopyInputStream* input,
Message* output) { Message* output) {
bool allow_unknown_extension = true;
int recursion_limit = std::numeric_limits<int>::max(); int recursion_limit = std::numeric_limits<int>::max();
MediaPipeParserImpl parser( MediaPipeParserImpl parser(
output->GetDescriptor(), input, error_collector_, finder_, output->GetDescriptor(), input, error_collector_, finder_,
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES, parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
allow_case_insensitive_field_, allow_unknown_field_, allow_case_insensitive_field_, allow_unknown_field_,
allow_unknown_extension, allow_unknown_enum_, allow_field_number_, allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
allow_relaxed_whitespace_, allow_partial_, recursion_limit); allow_relaxed_whitespace_, allow_partial_, recursion_limit);
return MergeUsingImpl(input, output, &parser); return MergeUsingImpl(input, output, &parser);
} }
@ -1737,13 +1736,12 @@ bool TemplateParser::Parser::MergeUsingImpl(
bool TemplateParser::Parser::ParseFieldValueFromString( bool TemplateParser::Parser::ParseFieldValueFromString(
const std::string& input, const FieldDescriptor* field, Message* output) { const std::string& input, const FieldDescriptor* field, Message* output) {
io::ArrayInputStream input_stream(input.data(), input.size()); io::ArrayInputStream input_stream(input.data(), input.size());
bool allow_unknown_extension = true;
int recursion_limit = std::numeric_limits<int>::max(); int recursion_limit = std::numeric_limits<int>::max();
ParserImpl parser( ParserImpl parser(
output->GetDescriptor(), &input_stream, error_collector_, finder_, output->GetDescriptor(), &input_stream, error_collector_, finder_,
parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES, parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
allow_case_insensitive_field_, allow_unknown_field_, allow_case_insensitive_field_, allow_unknown_field_,
allow_unknown_extension, allow_unknown_enum_, allow_field_number_, allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
allow_relaxed_whitespace_, allow_partial_, recursion_limit); allow_relaxed_whitespace_, allow_partial_, recursion_limit);
return parser.ParseField(field, output); return parser.ParseField(field, output);
} }

View File

@ -37,6 +37,10 @@ class TemplateParser {
Parser(); Parser();
~Parser(); ~Parser();
void set_allow_unknown_extension(bool allow_unknown_extension) {
allow_unknown_extension_ = allow_unknown_extension;
}
// Like TextFormat::Parse(). // Like TextFormat::Parse().
bool Parse(proto_ns::io::ZeroCopyInputStream* input, bool Parse(proto_ns::io::ZeroCopyInputStream* input,
proto_ns::Message* output); proto_ns::Message* output);
@ -99,6 +103,7 @@ class TemplateParser {
bool allow_partial_; bool allow_partial_;
bool allow_case_insensitive_field_; bool allow_case_insensitive_field_;
bool allow_unknown_field_; bool allow_unknown_field_;
bool allow_unknown_extension_;
bool allow_unknown_enum_; bool allow_unknown_enum_;
bool allow_field_number_; bool allow_field_number_;
bool allow_relaxed_whitespace_; bool allow_relaxed_whitespace_;

View File

@ -34,6 +34,13 @@ typedef int DimensionsPacketType[2];
namespace mediapipe { namespace mediapipe {
constexpr char kLeftRightPaddingTag[] = "LEFT_RIGHT_PADDING";
constexpr char kTopBottomPaddingTag[] = "TOP_BOTTOM_PADDING";
constexpr char kOptionsTag[] = "OPTIONS";
constexpr char kOutputDimensionsTag[] = "OUTPUT_DIMENSIONS";
constexpr char kRotationTag[] = "ROTATION";
constexpr char kImageTag[] = "IMAGE";
using Image = mediapipe::Image; using Image = mediapipe::Image;
// Scales, rotates, horizontal or vertical flips the image. // Scales, rotates, horizontal or vertical flips the image.
@ -102,41 +109,41 @@ REGISTER_CALCULATOR(GlScalerCalculator);
// static // static
absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) { absl::Status GlScalerCalculator::GetContract(CalculatorContract* cc) {
if (cc->Inputs().HasTag("IMAGE")) { if (cc->Inputs().HasTag(kImageTag)) {
cc->Inputs().Tag("IMAGE").Set<Image>(); cc->Inputs().Tag(kImageTag).Set<Image>();
} else { } else {
TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>(); TagOrIndex(&cc->Inputs(), "VIDEO", 0).Set<GpuBuffer>();
} }
if (cc->Outputs().HasTag("IMAGE")) { if (cc->Outputs().HasTag(kImageTag)) {
cc->Outputs().Tag("IMAGE").Set<Image>(); cc->Outputs().Tag(kImageTag).Set<Image>();
} else { } else {
TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>(); TagOrIndex(&cc->Outputs(), "VIDEO", 0).Set<GpuBuffer>();
} }
if (cc->Inputs().HasTag("ROTATION")) { if (cc->Inputs().HasTag(kRotationTag)) {
cc->Inputs().Tag("ROTATION").Set<int>(); cc->Inputs().Tag(kRotationTag).Set<int>();
} }
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) { if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>(); cc->Inputs().Tag(kOutputDimensionsTag).Set<DimensionsPacketType>();
} }
MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc)); MP_RETURN_IF_ERROR(GlCalculatorHelper::UpdateContract(cc));
if (cc->InputSidePackets().HasTag("OPTIONS")) { if (cc->InputSidePackets().HasTag(kOptionsTag)) {
cc->InputSidePackets().Tag("OPTIONS").Set<GlScalerCalculatorOptions>(); cc->InputSidePackets().Tag(kOptionsTag).Set<GlScalerCalculatorOptions>();
} }
if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) { if (HasTagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)) {
TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1) TagOrIndex(&cc->InputSidePackets(), "OUTPUT_DIMENSIONS", 1)
.Set<DimensionsPacketType>(); .Set<DimensionsPacketType>();
} }
if (cc->InputSidePackets().HasTag("ROTATION")) { if (cc->InputSidePackets().HasTag(kRotationTag)) {
// Counterclockwise rotation. // Counterclockwise rotation.
cc->InputSidePackets().Tag("ROTATION").Set<int>(); cc->InputSidePackets().Tag(kRotationTag).Set<int>();
} }
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") && if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) { cc->Outputs().HasTag(kLeftRightPaddingTag)) {
cc->Outputs().Tag("TOP_BOTTOM_PADDING").Set<float>(); cc->Outputs().Tag(kTopBottomPaddingTag).Set<float>();
cc->Outputs().Tag("LEFT_RIGHT_PADDING").Set<float>(); cc->Outputs().Tag(kLeftRightPaddingTag).Set<float>();
} }
return absl::OkStatus(); return absl::OkStatus();
} }
@ -187,8 +194,8 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
dst_width_ = dimensions[0]; dst_width_ = dimensions[0];
dst_height_ = dimensions[1]; dst_height_ = dimensions[1];
} }
if (cc->InputSidePackets().HasTag("ROTATION")) { if (cc->InputSidePackets().HasTag(kRotationTag)) {
rotation_ccw = cc->InputSidePackets().Tag("ROTATION").Get<int>(); rotation_ccw = cc->InputSidePackets().Tag(kRotationTag).Get<int>();
} }
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw)); MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
@ -197,22 +204,22 @@ absl::Status GlScalerCalculator::Open(CalculatorContext* cc) {
} }
absl::Status GlScalerCalculator::Process(CalculatorContext* cc) { absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) { if (cc->Inputs().HasTag(kOutputDimensionsTag)) {
if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) { if (cc->Inputs().Tag(kOutputDimensionsTag).IsEmpty()) {
// OUTPUT_DIMENSIONS input stream is specified, but value is missing. // OUTPUT_DIMENSIONS input stream is specified, but value is missing.
return absl::OkStatus(); return absl::OkStatus();
} }
const auto& dimensions = const auto& dimensions =
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<DimensionsPacketType>(); cc->Inputs().Tag(kOutputDimensionsTag).Get<DimensionsPacketType>();
dst_width_ = dimensions[0]; dst_width_ = dimensions[0];
dst_height_ = dimensions[1]; dst_height_ = dimensions[1];
} }
return helper_.RunInGlContext([this, cc]() -> absl::Status { return helper_.RunInGlContext([this, cc]() -> absl::Status {
const auto& input = const auto& input =
cc->Inputs().HasTag("IMAGE") cc->Inputs().HasTag(kImageTag)
? cc->Inputs().Tag("IMAGE").Get<Image>().GetGpuBuffer() ? cc->Inputs().Tag(kImageTag).Get<Image>().GetGpuBuffer()
: TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>(); : TagOrIndex(cc->Inputs(), "VIDEO", 0).Get<GpuBuffer>();
QuadRenderer* renderer = nullptr; QuadRenderer* renderer = nullptr;
GlTexture src1; GlTexture src1;
@ -254,8 +261,8 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
RET_CHECK(renderer) << "Unsupported input texture type"; RET_CHECK(renderer) << "Unsupported input texture type";
// Override input side packet if ROTATION input packet is provided. // Override input side packet if ROTATION input packet is provided.
if (cc->Inputs().HasTag("ROTATION")) { if (cc->Inputs().HasTag(kRotationTag)) {
int rotation_ccw = cc->Inputs().Tag("ROTATION").Get<int>(); int rotation_ccw = cc->Inputs().Tag(kRotationTag).Get<int>();
MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw)); MP_RETURN_IF_ERROR(FrameRotationFromInt(&rotation_, rotation_ccw));
} }
@ -263,18 +270,18 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
int dst_height; int dst_height;
GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height); GetOutputDimensions(src1.width(), src1.height(), &dst_width, &dst_height);
if (cc->Outputs().HasTag("TOP_BOTTOM_PADDING") && if (cc->Outputs().HasTag(kTopBottomPaddingTag) &&
cc->Outputs().HasTag("LEFT_RIGHT_PADDING")) { cc->Outputs().HasTag(kLeftRightPaddingTag)) {
float top_bottom_padding; float top_bottom_padding;
float left_right_padding; float left_right_padding;
GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height, GetOutputPadding(src1.width(), src1.height(), dst_width, dst_height,
&top_bottom_padding, &left_right_padding); &top_bottom_padding, &left_right_padding);
cc->Outputs() cc->Outputs()
.Tag("TOP_BOTTOM_PADDING") .Tag(kTopBottomPaddingTag)
.AddPacket( .AddPacket(
MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp())); MakePacket<float>(top_bottom_padding).At(cc->InputTimestamp()));
cc->Outputs() cc->Outputs()
.Tag("LEFT_RIGHT_PADDING") .Tag(kLeftRightPaddingTag)
.AddPacket( .AddPacket(
MakePacket<float>(left_right_padding).At(cc->InputTimestamp())); MakePacket<float>(left_right_padding).At(cc->InputTimestamp()));
} }
@ -304,9 +311,9 @@ absl::Status GlScalerCalculator::Process(CalculatorContext* cc) {
glFlush(); glFlush();
if (cc->Outputs().HasTag("IMAGE")) { if (cc->Outputs().HasTag(kImageTag)) {
auto output = dst.GetFrame<Image>(); auto output = dst.GetFrame<Image>();
cc->Outputs().Tag("IMAGE").Add(output.release(), cc->InputTimestamp()); cc->Outputs().Tag(kImageTag).Add(output.release(), cc->InputTimestamp());
} else { } else {
auto output = dst.GetFrame<GpuBuffer>(); auto output = dst.GetFrame<GpuBuffer>();
TagOrIndex(&cc->Outputs(), "VIDEO", 0) TagOrIndex(&cc->Outputs(), "VIDEO", 0)

View File

@ -24,6 +24,7 @@ package(default_visibility = ["//visibility:public"])
cc_library( cc_library(
name = "pose_tracking_gpu_deps", name = "pose_tracking_gpu_deps",
deps = [ deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_gpu", "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_gpu",
"//mediapipe/modules/pose_landmark:pose_landmark_gpu", "//mediapipe/modules/pose_landmark:pose_landmark_gpu",
@ -40,6 +41,7 @@ mediapipe_binary_graph(
cc_library( cc_library(
name = "pose_tracking_cpu_deps", name = "pose_tracking_cpu_deps",
deps = [ deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu", "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
"//mediapipe/modules/pose_landmark:pose_landmark_cpu", "//mediapipe/modules/pose_landmark:pose_landmark_cpu",

View File

@ -8,6 +8,17 @@ output_stream: "output_video"
# Pose landmarks. (NormalizedLandmarkList) # Pose landmarks. (NormalizedLandmarkList)
output_stream: "pose_landmarks" output_stream: "pose_landmarks"
# Generates side packet to enable segmentation.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:enable_segmentation"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
# Throttles the images flowing downstream for flow control. It passes through # Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes # the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it # (calculators and subgraphs) in the graph to finish their tasks before it
@ -32,8 +43,10 @@ node {
# Subgraph that detects poses and corresponding landmarks. # Subgraph that detects poses and corresponding landmarks.
node { node {
calculator: "PoseLandmarkCpu" calculator: "PoseLandmarkCpu"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
output_stream: "LANDMARKS:pose_landmarks" output_stream: "LANDMARKS:pose_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
output_stream: "DETECTION:pose_detection" output_stream: "DETECTION:pose_detection"
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
} }
@ -43,7 +56,8 @@ node {
calculator: "PoseRendererCpu" calculator: "PoseRendererCpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:pose_landmarks" input_stream: "LANDMARKS:pose_landmarks"
input_stream: "ROI:roi_from_landmarks" input_stream: "SEGMENTATION_MASK:segmentation_mask"
input_stream: "DETECTION:pose_detection" input_stream: "DETECTION:pose_detection"
input_stream: "ROI:roi_from_landmarks"
output_stream: "IMAGE:output_video" output_stream: "IMAGE:output_video"
} }

View File

@ -8,6 +8,17 @@ output_stream: "output_video"
# Pose landmarks. (NormalizedLandmarkList) # Pose landmarks. (NormalizedLandmarkList)
output_stream: "pose_landmarks" output_stream: "pose_landmarks"
# Generates side packet to enable segmentation.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:enable_segmentation"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
# Throttles the images flowing downstream for flow control. It passes through # Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes # the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it # (calculators and subgraphs) in the graph to finish their tasks before it
@ -32,8 +43,10 @@ node {
# Subgraph that detects poses and corresponding landmarks. # Subgraph that detects poses and corresponding landmarks.
node { node {
calculator: "PoseLandmarkGpu" calculator: "PoseLandmarkGpu"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
output_stream: "LANDMARKS:pose_landmarks" output_stream: "LANDMARKS:pose_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
output_stream: "DETECTION:pose_detection" output_stream: "DETECTION:pose_detection"
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
} }
@ -43,7 +56,8 @@ node {
calculator: "PoseRendererGpu" calculator: "PoseRendererGpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:pose_landmarks" input_stream: "LANDMARKS:pose_landmarks"
input_stream: "ROI:roi_from_landmarks" input_stream: "SEGMENTATION_MASK:segmentation_mask"
input_stream: "DETECTION:pose_detection" input_stream: "DETECTION:pose_detection"
input_stream: "ROI:roi_from_landmarks"
output_stream: "IMAGE:output_video" output_stream: "IMAGE:output_video"
} }

View File

@ -27,6 +27,7 @@ mediapipe_simple_subgraph(
register_as = "PoseRendererGpu", register_as = "PoseRendererGpu",
deps = [ deps = [
"//mediapipe/calculators/core:split_landmarks_calculator", "//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/image:recolor_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator", "//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator", "//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator", "//mediapipe/calculators/util:landmarks_to_render_data_calculator",
@ -41,6 +42,7 @@ mediapipe_simple_subgraph(
register_as = "PoseRendererCpu", register_as = "PoseRendererCpu",
deps = [ deps = [
"//mediapipe/calculators/core:split_landmarks_calculator", "//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/image:recolor_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator", "//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator", "//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator", "//mediapipe/calculators/util:landmarks_to_render_data_calculator",

Some files were not shown because too many files have changed in this diff Show More