diff --git a/WORKSPACE b/WORKSPACE index 7aa985d62..e77a0e79d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -331,7 +331,9 @@ load("@rules_jvm_external//:defs.bzl", "maven_install") maven_install( artifacts = [ "androidx.concurrent:concurrent-futures:1.0.0-alpha03", - "androidx.lifecycle:lifecycle-common:2.2.0", + "androidx.lifecycle:lifecycle-common:2.3.1", + "androidx.activity:activity:1.2.2", + "androidx.fragment:fragment:1.3.4", "androidx.annotation:annotation:aar:1.1.0", "androidx.appcompat:appcompat:aar:1.1.0-rc01", "androidx.camera:camera-core:1.0.0-beta10", @@ -376,9 +378,9 @@ http_archive( ) # Tensorflow repo should always go after the other external dependencies. -# 2021-06-07 -_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec" -_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126" +# 2021-07-29 +_TENSORFLOW_GIT_COMMIT = "52a2905cbc21034766c08041933053178c5d10e3" +_TENSORFLOW_SHA256 = "06d4691bcdb700f3275fa0971a1585221c2b9f3dffe867963be565a6643d7f56" http_archive( name = "org_tensorflow", urls = [ @@ -399,3 +401,18 @@ load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3") tf_workspace3() load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2") tf_workspace2() + +# Edge TPU +http_archive( + name = "libedgetpu", + sha256 = "14d5527a943a25bc648c28a9961f954f70ba4d79c0a9ca5ae226e1831d72fe80", + strip_prefix = "libedgetpu-3164995622300286ef2bb14d7fdc2792dae045b7", + urls = [ + "https://github.com/google-coral/libedgetpu/archive/3164995622300286ef2bb14d7fdc2792dae045b7.tar.gz" + ], +) +load("@libedgetpu//:workspace.bzl", "libedgetpu_dependencies") +libedgetpu_dependencies() + +load("@coral_crosstool//:configure.bzl", "cc_crosstool") +cc_crosstool(name = "crosstool") diff --git a/docs/getting_started/android.md b/docs/getting_started/android.md index 71224a258..c3c6506ee 100644 --- a/docs/getting_started/android.md +++ b/docs/getting_started/android.md @@ -16,12 +16,14 @@ nav_order: 1 Please follow instructions below to build Android example apps in the supported MediaPipe [solutions](../solutions/solutions.md). To learn more about these -example apps, start from [Hello World! on Android](./hello_world_android.md). To -incorporate MediaPipe into an existing Android Studio project, see these -[instructions](./android_archive_library.md) that use Android Archive (AAR) and -Gradle. +example apps, start from [Hello World! on Android](./hello_world_android.md). -## Building Android example apps +To incorporate MediaPipe into Android Studio projects, see these +[instructions](./android_solutions.md) to use the MediaPipe Android Solution +APIs (currently in alpha) that are now available in +[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe). + +## Building Android example apps with Bazel ### Prerequisite @@ -51,16 +53,6 @@ $YOUR_INTENDED_API_LEVEL` in android_ndk_repository() and/or android_sdk_repository() in the [`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) file. -Please verify all the necessary packages are installed. - -* Android SDK Platform API Level 28 or 29 -* Android SDK Build-Tools 28 or 29 -* Android SDK Platform-Tools 28 or 29 -* Android SDK Tools 26.1.1 -* Android NDK 19c or above - -### Option 1: Build with Bazel in Command Line - Tip: You can run this [script](https://github.com/google/mediapipe/blob/master/build_android_examples.sh) to build (and install) all MediaPipe Android example apps. @@ -84,108 +76,3 @@ to build (and install) all MediaPipe Android example apps. ```bash adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/handtrackinggpu.apk ``` - -### Option 2: Build with Bazel in Android Studio - -The MediaPipe project can be imported into Android Studio using the Bazel -plugins. This allows the MediaPipe examples to be built and modified in Android -Studio. - -To incorporate MediaPipe into an existing Android Studio project, see these -[instructions](./android_archive_library.md) that use Android Archive (AAR) and -Gradle. - -The steps below use Android Studio 3.5 to build and install a MediaPipe example -app: - -1. Install and launch Android Studio 3.5. - -2. Select `Configure` -> `SDK Manager` -> `SDK Platforms`. - - * Verify that Android SDK Platform API Level 28 or 29 is installed. - * Take note of the Android SDK Location, e.g., - `/usr/local/home/Android/Sdk`. - -3. Select `Configure` -> `SDK Manager` -> `SDK Tools`. - - * Verify that Android SDK Build-Tools 28 or 29 is installed. - * Verify that Android SDK Platform-Tools 28 or 29 is installed. - * Verify that Android SDK Tools 26.1.1 is installed. - * Verify that Android NDK 19c or above is installed. - * Take note of the Android NDK Location, e.g., - `/usr/local/home/Android/Sdk/ndk-bundle` or - `/usr/local/home/Android/Sdk/ndk/20.0.5594570`. - -4. Set environment variables `$ANDROID_HOME` and `$ANDROID_NDK_HOME` to point - to the installed SDK and NDK. - - ```bash - export ANDROID_HOME=/usr/local/home/Android/Sdk - - # If the NDK libraries are installed by a previous version of Android Studio, do - export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk-bundle - # If the NDK libraries are installed by Android Studio 3.5, do - export ANDROID_NDK_HOME=/usr/local/home/Android/Sdk/ndk/ - ``` - -5. Select `Configure` -> `Plugins` to install `Bazel`. - -6. On Linux, select `File` -> `Settings` -> `Bazel settings`. On macos, select - `Android Studio` -> `Preferences` -> `Bazel settings`. Then, modify `Bazel - binary location` to be the same as the output of `$ which bazel`. - -7. Select `Import Bazel Project`. - - * Select `Workspace`: `/path/to/mediapipe` and select `Next`. - * Select `Generate from BUILD file`: `/path/to/mediapipe/BUILD` and select - `Next`. - * Modify `Project View` to be the following and select `Finish`. - - ``` - directories: - # read project settings, e.g., .bazelrc - . - -mediapipe/objc - -mediapipe/examples/ios - - targets: - //mediapipe/examples/android/...:all - //mediapipe/java/...:all - - android_sdk_platform: android-29 - - sync_flags: - --host_crosstool_top=@bazel_tools//tools/cpp:toolchain - ``` - -8. Select `Bazel` -> `Sync` -> `Sync project with Build files`. - - Note: Even after doing step 4, if you still see the error: `"no such package - '@androidsdk//': Either the path attribute of android_sdk_repository or the - ANDROID_HOME environment variable must be set."`, please modify the - [`WORKSPACE`](https://github.com/google/mediapipe/blob/master/WORKSPACE) - file to point to your SDK and NDK library locations, as below: - - ``` - android_sdk_repository( - name = "androidsdk", - path = "/path/to/android/sdk" - ) - - android_ndk_repository( - name = "androidndk", - path = "/path/to/android/ndk" - ) - ``` - -9. Connect an Android device to the workstation. - -10. Select `Run...` -> `Edit Configurations...`. - - * Select `Templates` -> `Bazel Command`. - * Enter Target Expression: - `//mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu:handtrackinggpu` - * Enter Bazel command: `mobile-install`. - * Enter Bazel flags: `-c opt --config=android_arm64`. - * Press the `[+]` button to add the new configuration. - * Select `Run` to run the example app on the connected Android device. diff --git a/docs/getting_started/android_archive_library.md b/docs/getting_started/android_archive_library.md index bb7125243..d2f25213f 100644 --- a/docs/getting_started/android_archive_library.md +++ b/docs/getting_started/android_archive_library.md @@ -3,7 +3,7 @@ layout: default title: MediaPipe Android Archive parent: MediaPipe on Android grand_parent: Getting Started -nav_order: 2 +nav_order: 3 --- # MediaPipe Android Archive diff --git a/docs/getting_started/android_solutions.md b/docs/getting_started/android_solutions.md new file mode 100644 index 000000000..de7135c18 --- /dev/null +++ b/docs/getting_started/android_solutions.md @@ -0,0 +1,79 @@ +--- +layout: default +title: Android Solutions +parent: MediaPipe on Android +grand_parent: Getting Started +nav_order: 2 +--- + +# Android Solution APIs +{: .no_toc } + +1. TOC +{:toc} +--- + +Please follow instructions below to use the MediaPipe Solution APIs in Android +Studio projects and build the Android example apps in the supported MediaPipe +[solutions](../solutions/solutions.md). + +## Integrate MediaPipe Android Solutions in Android Studio + +MediaPipe Android Solution APIs (currently in alpha) are now available in +[Google's Maven Repository](https://maven.google.com/web/index.html?#com.google.mediapipe). +To incorporate MediaPipe Android Solutions into an Android Studio project, add +the following into the project's Gradle dependencies: + +``` +dependencies { + // MediaPipe solution-core is the foundation of any MediaPipe solutions. + implementation 'com.google.mediapipe:solution-core:latest.release' + // Optional: MediaPipe Hands solution. + implementation 'com.google.mediapipe:hands:latest.release' + // Optional: MediaPipe FaceMesh solution. + implementation 'com.google.mediapipe:facemesh:latest.release' + // MediaPipe deps + implementation 'com.google.flogger:flogger:latest.release' + implementation 'com.google.flogger:flogger-system-backend:latest.release' + implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.protobuf:protobuf-java:3.11.4' + // CameraX core library + def camerax_version = "1.0.0-beta10" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + implementation "androidx.camera:camera-lifecycle:$camerax_version" +} +``` + +See the detailed solutions API usage examples for different use cases in the +solution example apps' +[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions). +If the prebuilt maven packages are not sufficient, building the MediaPipe +Android archive library locally by following these +[instructions](./android_archive_library.md). + +## Build solution example apps in Android Studio + +1. Open Android Studio Arctic Fox on Linux, macOS, or Windows. + +2. Import mediapipe/examples/android/solutions directory into Android Studio. + + ![Screenshot](../images/import_mp_android_studio_project.png) + +3. For Windows users, run `create_win_symlinks.bat` as administrator to create + res directory symlinks. + + ![Screenshot](../images/run_create_win_symlinks.png) + +4. Select "File" -> "Sync Project with Gradle Files" to sync project. + +5. Run solution example app in Android Studio. + + ![Screenshot](../images/run_android_solution_app.png) + +6. (Optional) Run solutions on CPU. + + MediaPipe solution example apps run the pipeline and the model inference on + GPU by default. If needed, for example to run the apps on Android Emulator, + set the `RUN_ON_GPU` boolean variable to `false` in the app's + MainActivity.java to run the pipeline and the model inference on CPU. diff --git a/docs/getting_started/install.md b/docs/getting_started/install.md index 95dce1d17..b90c0f3bd 100644 --- a/docs/getting_started/install.md +++ b/docs/getting_started/install.md @@ -43,104 +43,189 @@ install --user six`. 3. Install OpenCV and FFmpeg. - Option 1. Use package manager tool to install the pre-compiled OpenCV - libraries. FFmpeg will be installed via libopencv-video-dev. + **Option 1**. Use package manager tool to install the pre-compiled OpenCV + libraries. FFmpeg will be installed via `libopencv-video-dev`. - Note: Debian 9 and Ubuntu 16.04 provide OpenCV 2.4.9. You may want to take - option 2 or 3 to install OpenCV 3 or above. + OS | OpenCV + -------------------- | ------ + Debian 9 (stretch) | 2.4 + Debian 10 (buster) | 3.2 + Debian 11 (bullseye) | 4.5 + Ubuntu 16.04 LTS | 2.4 + Ubuntu 18.04 LTS | 3.2 + Ubuntu 20.04 LTS | 4.2 + Ubuntu 20.04 LTS | 4.2 + Ubuntu 21.04 | 4.5 ```bash - $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \ - libopencv-calib3d-dev libopencv-features2d-dev \ - libopencv-imgproc-dev libopencv-video-dev + $ sudo apt-get install -y \ + libopencv-core-dev \ + libopencv-highgui-dev \ + libopencv-calib3d-dev \ + libopencv-features2d-dev \ + libopencv-imgproc-dev \ + libopencv-video-dev ``` - Debian 9 and Ubuntu 18.04 install the packages in - `/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and - [`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04 - may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow - the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and - [`ffmpeg_linux.BUILD`] files accordingly. - - Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the - library path needs to be modified like the following: + MediaPipe's [`opencv_linux.BUILD`] and [`WORKSPACE`] are already configured + for OpenCV 2/3 and should work correctly on any architecture: ```bash - sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD + # WORKSPACE + new_local_repository( + name = "linux_opencv", + build_file = "@//third_party:opencv_linux.BUILD", + path = "/usr", + ) + + # opencv_linux.BUILD for OpenCV 2/3 installed from Debian package + cc_library( + name = "opencv", + linkopts = [ + "-l:libopencv_core.so", + "-l:libopencv_calib3d.so", + "-l:libopencv_features2d.so", + "-l:libopencv_highgui.so", + "-l:libopencv_imgcodecs.so", + "-l:libopencv_imgproc.so", + "-l:libopencv_video.so", + "-l:libopencv_videoio.so", + ], + ) ``` - Option 2. Run [`setup_opencv.sh`] to automatically build OpenCV from source - and modify MediaPipe's OpenCV config. + For OpenCV 4 you need to modify [`opencv_linux.BUILD`] taking into account + current architecture: - Option 3. Follow OpenCV's + ```bash + # WORKSPACE + new_local_repository( + name = "linux_opencv", + build_file = "@//third_party:opencv_linux.BUILD", + path = "/usr", + ) + + # opencv_linux.BUILD for OpenCV 4 installed from Debian package + cc_library( + name = "opencv", + hdrs = glob([ + # Uncomment according to your multiarch value (gcc -print-multiarch): + # "include/aarch64-linux-gnu/opencv4/opencv2/cvconfig.h", + # "include/arm-linux-gnueabihf/opencv4/opencv2/cvconfig.h", + # "include/x86_64-linux-gnu/opencv4/opencv2/cvconfig.h", + "include/opencv4/opencv2/**/*.h*", + ]), + includes = [ + # Uncomment according to your multiarch value (gcc -print-multiarch): + # "include/aarch64-linux-gnu/opencv4/", + # "include/arm-linux-gnueabihf/opencv4/", + # "include/x86_64-linux-gnu/opencv4/", + "include/opencv4/", + ], + linkopts = [ + "-l:libopencv_core.so", + "-l:libopencv_calib3d.so", + "-l:libopencv_features2d.so", + "-l:libopencv_highgui.so", + "-l:libopencv_imgcodecs.so", + "-l:libopencv_imgproc.so", + "-l:libopencv_video.so", + "-l:libopencv_videoio.so", + ], + ) + ``` + + **Option 2**. Run [`setup_opencv.sh`] to automatically build OpenCV from + source and modify MediaPipe's OpenCV config. This option will do all steps + defined in Option 3 automatically. + + **Option 3**. Follow OpenCV's [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and - [`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg - libraries. For example if OpenCV and FFmpeg are both manually installed in - "/usr/local/", you will need to update: (1) the "linux_opencv" and - "linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv" - cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg" - cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below: + You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point + MediaPipe to your own OpenCV libraries. Assume OpenCV would be installed to + `/usr/local/` which is recommended by default. + + OpenCV 2/3 setup: ```bash + # WORKSPACE new_local_repository( - name = "linux_opencv", - build_file = "@//third_party:opencv_linux.BUILD", - path = "/usr/local", + name = "linux_opencv", + build_file = "@//third_party:opencv_linux.BUILD", + path = "/usr/local", ) + # opencv_linux.BUILD for OpenCV 2/3 installed to /usr/local + cc_library( + name = "opencv", + linkopts = [ + "-L/usr/local/lib", + "-l:libopencv_core.so", + "-l:libopencv_calib3d.so", + "-l:libopencv_features2d.so", + "-l:libopencv_highgui.so", + "-l:libopencv_imgcodecs.so", + "-l:libopencv_imgproc.so", + "-l:libopencv_video.so", + "-l:libopencv_videoio.so", + ], + ) + ``` + + OpenCV 4 setup: + + ```bash + # WORKSPACE new_local_repository( - name = "linux_ffmpeg", - build_file = "@//third_party:ffmpeg_linux.BUILD", - path = "/usr/local", + name = "linux_opencv", + build_file = "@//third_party:opencv_linux.BUILD", + path = "/usr/local", ) + # opencv_linux.BUILD for OpenCV 4 installed to /usr/local cc_library( - name = "opencv", - srcs = glob( - [ - "lib/libopencv_core.so", - "lib/libopencv_highgui.so", - "lib/libopencv_imgcodecs.so", - "lib/libopencv_imgproc.so", - "lib/libopencv_video.so", - "lib/libopencv_videoio.so", - ], - ), - hdrs = glob([ - # For OpenCV 3.x - "include/opencv2/**/*.h*", - # For OpenCV 4.x - # "include/opencv4/opencv2/**/*.h*", - ]), - includes = [ - # For OpenCV 3.x - "include/", - # For OpenCV 4.x - # "include/opencv4/", - ], - linkstatic = 1, - visibility = ["//visibility:public"], + name = "opencv", + hdrs = glob([ + "include/opencv4/opencv2/**/*.h*", + ]), + includes = [ + "include/opencv4/", + ], + linkopts = [ + "-L/usr/local/lib", + "-l:libopencv_core.so", + "-l:libopencv_calib3d.so", + "-l:libopencv_features2d.so", + "-l:libopencv_highgui.so", + "-l:libopencv_imgcodecs.so", + "-l:libopencv_imgproc.so", + "-l:libopencv_video.so", + "-l:libopencv_videoio.so", + ], + ) + ``` + + Current FFmpeg setup is defined in [`ffmpeg_linux.BUILD`] and should work + for any architecture: + + ```bash + # WORKSPACE + new_local_repository( + name = "linux_ffmpeg", + build_file = "@//third_party:ffmpeg_linux.BUILD", + path = "/usr" ) + # ffmpeg_linux.BUILD for FFmpeg installed from Debian package cc_library( - name = "libffmpeg", - srcs = glob( - [ - "lib/libav*.so", - ], - ), - hdrs = glob(["include/libav*/*.h"]), - includes = ["include"], - linkopts = [ - "-lavcodec", - "-lavformat", - "-lavutil", - ], - linkstatic = 1, - visibility = ["//visibility:public"], + name = "libffmpeg", + linkopts = [ + "-l:libavcodec.so", + "-l:libavformat.so", + "-l:libavutil.so", + ], ) ``` diff --git a/docs/getting_started/javascript.md b/docs/getting_started/javascript.md index 5cae5cbd4..f56abcd6e 100644 --- a/docs/getting_started/javascript.md +++ b/docs/getting_started/javascript.md @@ -29,6 +29,16 @@ Solution | NPM Package | Example Click on a solution link above for more information, including API and code snippets. +### Supported plaforms: + +| Browser | Platform | Notes | +| ------- | ----------------------- | -------------------------------------- | +| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia | +| | | unsupported. | +| Chrome | iOS | Camera unavailable in Chrome on iOS. | +| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / | +| | | MacBook | + The quickest way to get acclimated is to look at the examples above. Each demo has a link to a [CodePen][codepen] so that you can edit the code and try it yourself. We have included a number of utility packages to help you get started: diff --git a/docs/images/import_mp_android_studio_project.png b/docs/images/import_mp_android_studio_project.png new file mode 100644 index 000000000..aa02b95ce Binary files /dev/null and b/docs/images/import_mp_android_studio_project.png differ diff --git a/docs/images/mobile/pose_segmentation.mp4 b/docs/images/mobile/pose_segmentation.mp4 new file mode 100644 index 000000000..e0a68da70 Binary files /dev/null and b/docs/images/mobile/pose_segmentation.mp4 differ diff --git a/docs/images/run_android_solution_app.png b/docs/images/run_android_solution_app.png new file mode 100644 index 000000000..aa21f3c24 Binary files /dev/null and b/docs/images/run_android_solution_app.png differ diff --git a/docs/images/run_create_win_symlinks.png b/docs/images/run_create_win_symlinks.png new file mode 100644 index 000000000..69b94b75f Binary files /dev/null and b/docs/images/run_create_win_symlinks.png differ diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index 5de1b41d3..a94785324 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -278,6 +278,7 @@ Supported configuration options: import cv2 import mediapipe as mp mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles mp_face_mesh = mp.solutions.face_mesh # For static images: @@ -301,9 +302,17 @@ with mp_face_mesh.FaceMesh( mp_drawing.draw_landmarks( image=annotated_image, landmark_list=face_landmarks, - connections=mp_face_mesh.FACE_CONNECTIONS, - landmark_drawing_spec=drawing_spec, - connection_drawing_spec=drawing_spec) + connections=mp_face_mesh.FACEMESH_TESSELATION, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_tesselation_style()) + mp_drawing.draw_landmarks( + image=annotated_image, + landmark_list=face_landmarks, + connections=mp_face_mesh.FACEMESH_CONTOURS, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_contours_style()) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) # For webcam input: @@ -335,9 +344,17 @@ with mp_face_mesh.FaceMesh( mp_drawing.draw_landmarks( image=image, landmark_list=face_landmarks, - connections=mp_face_mesh.FACE_CONNECTIONS, - landmark_drawing_spec=drawing_spec, - connection_drawing_spec=drawing_spec) + connections=mp_face_mesh.FACEMESH_TESSELATION, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_tesselation_style()) + mp_drawing.draw_landmarks( + image=image, + landmark_list=face_landmarks, + connections=mp_face_mesh.FACEMESH_CONTOURS, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_contours_style()) cv2.imshow('MediaPipe FaceMesh', image) if cv2.waitKey(5) & 0xFF == 27: break @@ -423,6 +440,200 @@ camera.start(); ``` +### Android Solution API + +Please first follow general +[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api) +to add MediaPipe Gradle dependencies, then try the FaceMash solution API in the +companion +[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh) +following +[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio) +and learn more in the usage example below. + +Supported configuration options: + +* [staticImageMode](#static_image_mode) +* [maxNumFaces](#max_num_faces) +* runOnGpu: Run the pipeline and the model inference on GPU or CPU. + +#### Camera Input + +```java +// For camera input and result rendering with OpenGL. +FaceMeshOptions faceMeshOptions = + FaceMeshOptions.builder() + .setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become + .setMaxNumFaces(1) // setStaticImageMode(false) + .setRunOnGpu(true).build(); +FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); +facemesh.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); + +// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh. +CameraInput cameraInput = new CameraInput(this); +cameraInput.setNewFrameListener( + textureFrame -> facemesh.send(textureFrame)); + +// Initializes a new GlSurfaceView with a ResultGlRenderer instance +// that provides the interfaces to run user-defined OpenGL rendering code. +// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java +// as an example. +SolutionGlSurfaceView glSurfaceView = + new SolutionGlSurfaceView<>( + this, facemesh.getGlContext(), facemesh.getGlMajorVersion()); +glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer()); +glSurfaceView.setRenderInputImage(true); + +facemesh.setResultListener( + faceMeshResult -> { + NormalizedLandmark noseLandmark = + result.multiFaceLandmarks().get(0).getLandmarkList().get(1); + Log.i( + TAG, + String.format( + "MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", + noseLandmark.getX(), noseLandmark.getY())); + // Request GL rendering. + glSurfaceView.setRenderData(faceMeshResult); + glSurfaceView.requestRender(); + }); + +// The runnable to start camera after the GLSurfaceView is attached. +glSurfaceView.post( + () -> + cameraInput.start( + this, + facemesh.getGlContext(), + CameraInput.CameraFacing.FRONT, + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); +``` + +#### Image Input + +```java +// For reading images from gallery and drawing the output in an ImageView. +FaceMeshOptions faceMeshOptions = + FaceMeshOptions.builder() + .setMode(FaceMeshOptions.STATIC_IMAGE_MODE) // API soon to become + .setMaxNumFaces(1) // setStaticImageMode(true) + .setRunOnGpu(true).build(); +FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); + +// Connects MediaPipe FaceMesh to the user-defined ImageView instance that allows +// users to have the custom drawing of the output landmarks on it. +// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java +// as an example. +FaceMeshResultImageView imageView = new FaceMeshResultImageView(this); +facemesh.setResultListener( + faceMeshResult -> { + int width = faceMeshResult.inputBitmap().getWidth(); + int height = faceMeshResult.inputBitmap().getHeight(); + NormalizedLandmark noseLandmark = + result.multiFaceLandmarks().get(0).getLandmarkList().get(1); + Log.i( + TAG, + String.format( + "MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f", + noseLandmark.getX() * width, noseLandmark.getY() * height)); + // Request canvas drawing. + imageView.setFaceMeshResult(faceMeshResult); + runOnUiThread(() -> imageView.update()); + }); +facemesh.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); + +// ActivityResultLauncher to get an image from the gallery as Bitmap. +ActivityResultLauncher imageGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null && result.getResultCode() == RESULT_OK) { + Bitmap bitmap = null; + try { + bitmap = + MediaStore.Images.Media.getBitmap( + this.getContentResolver(), resultIntent.getData()); + } catch (IOException e) { + Log.e(TAG, "Bitmap reading error:" + e); + } + if (bitmap != null) { + facemesh.send(bitmap); + } + } + }); +Intent gallery = new Intent( + Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); +imageGetter.launch(gallery); +``` + +#### Video Input + +```java +// For video input and result rendering with OpenGL. +FaceMeshOptions faceMeshOptions = + FaceMeshOptions.builder() + .setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become + .setMaxNumFaces(1) // setStaticImageMode(false) + .setRunOnGpu(true).build(); +FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); +facemesh.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); + +// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh. +VideoInput videoInput = new VideoInput(this); +videoInput.setNewFrameListener( + textureFrame -> facemesh.send(textureFrame)); + +// Initializes a new GlSurfaceView with a ResultGlRenderer instance +// that provides the interfaces to run user-defined OpenGL rendering code. +// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java +// as an example. +SolutionGlSurfaceView glSurfaceView = + new SolutionGlSurfaceView<>( + this, facemesh.getGlContext(), facemesh.getGlMajorVersion()); +glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer()); +glSurfaceView.setRenderInputImage(true); + +facemesh.setResultListener( + faceMeshResult -> { + NormalizedLandmark noseLandmark = + result.multiFaceLandmarks().get(0).getLandmarkList().get(1); + Log.i( + TAG, + String.format( + "MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", + noseLandmark.getX(), noseLandmark.getY())); + // Request GL rendering. + glSurfaceView.setRenderData(faceMeshResult); + glSurfaceView.requestRender(); + }); + +ActivityResultLauncher videoGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + glSurfaceView.post( + () -> + videoInput.start( + this, + resultIntent.getData(), + facemesh.getGlContext(), + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); + } + } + }); +Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); +videoGetter.launch(gallery); +``` + ## Example Apps Please first see general instructions for diff --git a/docs/solutions/hands.md b/docs/solutions/hands.md index 9c662912e..c3088d64c 100644 --- a/docs/solutions/hands.md +++ b/docs/solutions/hands.md @@ -219,8 +219,8 @@ Supported configuration options: import cv2 import mediapipe as mp mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles mp_hands = mp.solutions.hands -drawing_styles = mp.solutions.drawing_styles # For static images: IMAGE_FILES = [] @@ -249,9 +249,11 @@ with mp_hands.Hands( f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})' ) mp_drawing.draw_landmarks( - annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, - drawing_styles.get_default_hand_landmark_style(), - drawing_styles.get_default_hand_connection_style()) + annotated_image, + hand_landmarks, + mp_hands.HAND_CONNECTIONS, + mp_drawing_styles.get_default_hand_landmarks_style(), + mp_drawing_styles.get_default_hand_connections_style()) cv2.imwrite( '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1)) @@ -281,9 +283,11 @@ with mp_hands.Hands( if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks( - image, hand_landmarks, mp_hands.HAND_CONNECTIONS, - drawing_styles.get_default_hand_landmark_style(), - drawing_styles.get_default_hand_connection_style()) + image, + hand_landmarks, + mp_hands.HAND_CONNECTIONS, + mp_drawing_styles.get_default_hand_landmarks_style(), + mp_drawing_styles.get_default_hand_connections_style()) cv2.imshow('MediaPipe Hands', image) if cv2.waitKey(5) & 0xFF == 27: break @@ -364,6 +368,200 @@ camera.start(); ``` +### Android Solution API + +Please first follow general +[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api) +to add MediaPipe Gradle dependencies, then try the Hands solution API in the +companion +[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands) +following +[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio) +and learn more in usage example below. + +Supported configuration options: + +* [staticImageMode](#static_image_mode) +* [maxNumHands](#max_num_hands) +* runOnGpu: Run the pipeline and the model inference on GPU or CPU. + +#### Camera Input + +```java +// For camera input and result rendering with OpenGL. +HandsOptions handsOptions = + HandsOptions.builder() + .setMode(HandsOptions.STREAMING_MODE) // API soon to become + .setMaxNumHands(1) // setStaticImageMode(false) + .setRunOnGpu(true).build(); +Hands hands = new Hands(this, handsOptions); +hands.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); + +// Initializes a new CameraInput instance and connects it to MediaPipe Hands. +CameraInput cameraInput = new CameraInput(this); +cameraInput.setNewFrameListener( + textureFrame -> hands.send(textureFrame)); + +// Initializes a new GlSurfaceView with a ResultGlRenderer instance +// that provides the interfaces to run user-defined OpenGL rendering code. +// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java +// as an example. +SolutionGlSurfaceView glSurfaceView = + new SolutionGlSurfaceView<>( + this, hands.getGlContext(), hands.getGlMajorVersion()); +glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer()); +glSurfaceView.setRenderInputImage(true); + +hands.setResultListener( + handsResult -> { + NormalizedLandmark wristLandmark = Hands.getHandLandmark( + handsResult, 0, HandLandmark.WRIST); + Log.i( + TAG, + String.format( + "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f", + wristLandmark.getX(), wristLandmark.getY())); + // Request GL rendering. + glSurfaceView.setRenderData(handsResult); + glSurfaceView.requestRender(); + }); + +// The runnable to start camera after the GLSurfaceView is attached. +glSurfaceView.post( + () -> + cameraInput.start( + this, + hands.getGlContext(), + CameraInput.CameraFacing.FRONT, + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); +``` + +#### Image Input + +```java +// For reading images from gallery and drawing the output in an ImageView. +HandsOptions handsOptions = + HandsOptions.builder() + .setMode(HandsOptions.STATIC_IMAGE_MODE) // API soon to become + .setMaxNumHands(1) // setStaticImageMode(true) + .setRunOnGpu(true).build(); +Hands hands = new Hands(this, handsOptions); + +// Connects MediaPipe Hands to the user-defined ImageView instance that allows +// users to have the custom drawing of the output landmarks on it. +// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java +// as an example. +HandsResultImageView imageView = new HandsResultImageView(this); +hands.setResultListener( + handsResult -> { + int width = handsResult.inputBitmap().getWidth(); + int height = handsResult.inputBitmap().getHeight(); + NormalizedLandmark wristLandmark = Hands.getHandLandmark( + handsResult, 0, HandLandmark.WRIST); + Log.i( + TAG, + String.format( + "MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f", + wristLandmark.getX() * width, wristLandmark.getY() * height)); + // Request canvas drawing. + imageView.setHandsResult(handsResult); + runOnUiThread(() -> imageView.update()); + }); +hands.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); + +// ActivityResultLauncher to get an image from the gallery as Bitmap. +ActivityResultLauncher imageGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null && result.getResultCode() == RESULT_OK) { + Bitmap bitmap = null; + try { + bitmap = + MediaStore.Images.Media.getBitmap( + this.getContentResolver(), resultIntent.getData()); + } catch (IOException e) { + Log.e(TAG, "Bitmap reading error:" + e); + } + if (bitmap != null) { + hands.send(bitmap); + } + } + }); +Intent gallery = new Intent( + Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); +imageGetter.launch(gallery); +``` + +#### Video Input + +```java +// For video input and result rendering with OpenGL. +HandsOptions handsOptions = + HandsOptions.builder() + .setMode(HandsOptions.STREAMING_MODE) // API soon to become + .setMaxNumHands(1) // setStaticImageMode(false) + .setRunOnGpu(true).build(); +Hands hands = new Hands(this, handsOptions); +hands.setErrorListener( + (message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); + +// Initializes a new VideoInput instance and connects it to MediaPipe Hands. +VideoInput videoInput = new VideoInput(this); +videoInput.setNewFrameListener( + textureFrame -> hands.send(textureFrame)); + +// Initializes a new GlSurfaceView with a ResultGlRenderer instance +// that provides the interfaces to run user-defined OpenGL rendering code. +// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java +// as an example. +SolutionGlSurfaceView glSurfaceView = + new SolutionGlSurfaceView<>( + this, hands.getGlContext(), hands.getGlMajorVersion()); +glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer()); +glSurfaceView.setRenderInputImage(true); + +hands.setResultListener( + handsResult -> { + NormalizedLandmark wristLandmark = Hands.getHandLandmark( + handsResult, 0, HandLandmark.WRIST); + Log.i( + TAG, + String.format( + "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f", + wristLandmark.getX(), wristLandmark.getY())); + // Request GL rendering. + glSurfaceView.setRenderData(handsResult); + glSurfaceView.requestRender(); + }); + +ActivityResultLauncher videoGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + glSurfaceView.post( + () -> + videoInput.start( + this, + resultIntent.getData(), + hands.getGlContext(), + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); + } + } + }); +Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); +videoGetter.launch(gallery); +``` + ## Example Apps Please first see general instructions for diff --git a/docs/solutions/holistic.md b/docs/solutions/holistic.md index 1ae8034bf..0532a33dd 100644 --- a/docs/solutions/holistic.md +++ b/docs/solutions/holistic.md @@ -225,6 +225,7 @@ Supported configuration options: import cv2 import mediapipe as mp mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles mp_holistic = mp.solutions.holistic # For static images: @@ -247,13 +248,18 @@ with mp_holistic.Holistic( # Draw pose, left and right hands, and face landmarks on the image. annotated_image = image.copy() mp_drawing.draw_landmarks( - annotated_image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) + annotated_image, + results.face_landmarks, + mp_holistic.FACEMESH_TESSELATION, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_tesselation_style()) mp_drawing.draw_landmarks( - annotated_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) - mp_drawing.draw_landmarks( - annotated_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) - mp_drawing.draw_landmarks( - annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) + annotated_image, + results.pose_landmarks, + mp_holistic.POSE_CONNECTIONS, + landmark_drawing_spec=mp_drawing_styles. + get_default_pose_landmarks_style()) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) # Plot pose world landmarks. mp_drawing.plot_landmarks( @@ -283,13 +289,18 @@ with mp_holistic.Holistic( image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) mp_drawing.draw_landmarks( - image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) + image, + results.face_landmarks, + mp_holistic.FACEMESH_CONTOURS, + landmark_drawing_spec=None, + connection_drawing_spec=mp_drawing_styles + .get_default_face_mesh_contours_style()) mp_drawing.draw_landmarks( - image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) - mp_drawing.draw_landmarks( - image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) - mp_drawing.draw_landmarks( - image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) + image, + results.pose_landmarks, + mp_holistic.POSE_CONNECTIONS, + landmark_drawing_spec=mp_drawing_styles + .get_default_pose_landmarks_style()) cv2.imshow('MediaPipe Holistic', image) if cv2.waitKey(5) & 0xFF == 27: break diff --git a/docs/solutions/pose.md b/docs/solutions/pose.md index 0ae81a858..696f71943 100644 --- a/docs/solutions/pose.md +++ b/docs/solutions/pose.md @@ -30,7 +30,8 @@ overlay of digital content and information on top of the physical world in augmented reality. MediaPipe Pose is a ML solution for high-fidelity body pose tracking, inferring -33 3D landmarks on the whole body from RGB video frames utilizing our +33 3D landmarks and background segmentation mask on the whole body from RGB +video frames utilizing our [BlazePose](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) research that also powers the [ML Kit Pose Detection API](https://developers.google.com/ml-kit/vision/pose-detection). @@ -49,11 +50,11 @@ The solution utilizes a two-step detector-tracker ML pipeline, proven to be effective in our [MediaPipe Hands](./hands.md) and [MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline first locates the person/pose region-of-interest (ROI) within the frame. The -tracker subsequently predicts the pose landmarks within the ROI using the -ROI-cropped frame as input. Note that for video use cases the detector is -invoked only as needed, i.e., for the very first frame and when the tracker -could no longer identify body pose presence in the previous frame. For other -frames the pipeline simply derives the ROI from the previous frame’s pose +tracker subsequently predicts the pose landmarks and segmentation mask within +the ROI using the ROI-cropped frame as input. Note that for video use cases the +detector is invoked only as needed, i.e., for the very first frame and when the +tracker could no longer identify body pose presence in the previous frame. For +other frames the pipeline simply derives the ROI from the previous frame’s pose landmarks. The pipeline is implemented as a MediaPipe @@ -129,16 +130,19 @@ hip midpoints. The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks (see figure below). -Please find more detail in the -[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html), -this [paper](https://arxiv.org/abs/2006.10204) and -[the model card](./models.md#pose), and the attributes in each landmark -[below](#pose_landmarks). - ![pose_tracking_full_body_landmarks.png](../images/mobile/pose_tracking_full_body_landmarks.png) | :----------------------------------------------------------------------------------------------: | *Fig 4. 33 pose landmarks.* | +Optionally, MediaPipe Pose can predicts a full-body +[segmentation mask](#segmentation_mask) represented as a two-class segmentation +(human or background). + +Please find more detail in the +[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html), +this [paper](https://arxiv.org/abs/2006.10204), +[the model card](./models.md#pose) and the [Output](#Output) section below. + ## Solution APIs ### Cross-platform Configuration Options @@ -167,6 +171,18 @@ If set to `true`, the solution filters pose landmarks across different input images to reduce jitter, but ignored if [static_image_mode](#static_image_mode) is also set to `true`. Default to `true`. +#### enable_segmentation + +If set to `true`, in addition to the pose landmarks the solution also generates +the segmentation mask. Default to `false`. + +#### smooth_segmentation + +If set to `true`, the solution filters segmentation masks across different input +images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation) +is `false` or [static_image_mode](#static_image_mode) is `true`. Default to +`true`. + #### min_detection_confidence Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the @@ -211,6 +227,19 @@ the following: * `visibility`: Identical to that defined in the corresponding [pose_landmarks](#pose_landmarks). +#### segmentation_mask + +The output segmentation mask, predicted only when +[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the +same width and height as the input image, and contains values in `[0.0, 1.0]` +where `1.0` and `0.0` indicate high certainty of a "human" and "background" +pixel respectively. Please refer to the platform-specific usage examples below +for usage details. + +*Fig 6. Example of MediaPipe Pose segmentation mask.* | +:-----------------------------------------------------------: | + | + ### Python Solution API Please first follow general [instructions](../getting_started/python.md) to @@ -222,6 +251,8 @@ Supported configuration options: * [static_image_mode](#static_image_mode) * [model_complexity](#model_complexity) * [smooth_landmarks](#smooth_landmarks) +* [enable_segmentation](#enable_segmentation) +* [smooth_segmentation](#smooth_segmentation) * [min_detection_confidence](#min_detection_confidence) * [min_tracking_confidence](#min_tracking_confidence) @@ -229,13 +260,16 @@ Supported configuration options: import cv2 import mediapipe as mp mp_drawing = mp.solutions.drawing_utils +mp_drawing_styles = mp.solutions.drawing_styles mp_pose = mp.solutions.pose # For static images: IMAGE_FILES = [] +BG_COLOR = (192, 192, 192) # gray with mp_pose.Pose( static_image_mode=True, model_complexity=2, + enable_segmentation=True, min_detection_confidence=0.5) as pose: for idx, file in enumerate(IMAGE_FILES): image = cv2.imread(file) @@ -250,10 +284,21 @@ with mp_pose.Pose( f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, ' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})' ) - # Draw pose landmarks on the image. + annotated_image = image.copy() + # Draw segmentation on the image. + # To improve segmentation around boundaries, consider applying a joint + # bilateral filter to "results.segmentation_mask" with "image". + condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1 + bg_image = np.zeros(image.shape, dtype=np.uint8) + bg_image[:] = BG_COLOR + annotated_image = np.where(condition, annotated_image, bg_image) + # Draw pose landmarks on the image. mp_drawing.draw_landmarks( - annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) + annotated_image, + results.pose_landmarks, + mp_pose.POSE_CONNECTIONS, + landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) # Plot pose world landmarks. mp_drawing.plot_landmarks( @@ -283,7 +328,10 @@ with mp_pose.Pose( image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) mp_drawing.draw_landmarks( - image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) + image, + results.pose_landmarks, + mp_pose.POSE_CONNECTIONS, + landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) cv2.imshow('MediaPipe Pose', image) if cv2.waitKey(5) & 0xFF == 27: break @@ -300,6 +348,8 @@ Supported configuration options: * [modelComplexity](#model_complexity) * [smoothLandmarks](#smooth_landmarks) +* [enableSegmentation](#enable_segmentation) +* [smoothSegmentation](#smooth_segmentation) * [minDetectionConfidence](#min_detection_confidence) * [minTrackingConfidence](#min_tracking_confidence) @@ -340,8 +390,20 @@ function onResults(results) { canvasCtx.save(); canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height); + canvasCtx.drawImage(results.segmentationMask, 0, 0, + canvasElement.width, canvasElement.height); + + // Only overwrite existing pixels. + canvasCtx.globalCompositeOperation = 'source-in'; + canvasCtx.fillStyle = '#00FF00'; + canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height); + + // Only overwrite missing pixels. + canvasCtx.globalCompositeOperation = 'destination-atop'; canvasCtx.drawImage( results.image, 0, 0, canvasElement.width, canvasElement.height); + + canvasCtx.globalCompositeOperation = 'source-over'; drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS, {color: '#00FF00', lineWidth: 4}); drawLandmarks(canvasCtx, results.poseLandmarks, @@ -357,6 +419,8 @@ const pose = new Pose({locateFile: (file) => { pose.setOptions({ modelComplexity: 1, smoothLandmarks: true, + enableSegmentation: true, + smoothSegmentation: true, minDetectionConfidence: 0.5, minTrackingConfidence: 0.5 }); diff --git a/mediapipe/calculators/core/gate_calculator.cc b/mediapipe/calculators/core/gate_calculator.cc index dc6c7748f..8fdb9e0a3 100644 --- a/mediapipe/calculators/core/gate_calculator.cc +++ b/mediapipe/calculators/core/gate_calculator.cc @@ -64,8 +64,9 @@ std::string ToString(GateState state) { // ALLOW or DISALLOW can also be specified as an input side packet. The rules // for evaluation remain the same as above. // -// ALLOW/DISALLOW inputs must be specified either using input stream or -// via input side packet but not both. +// ALLOW/DISALLOW inputs must be specified either using input stream or via +// input side packet but not both. If neither is specified, the behavior is then +// determined by the "allow" field in the calculator options. // // Intended to be used with the default input stream handler, which synchronizes // all data input streams with the ALLOW/DISALLOW control input stream. @@ -92,20 +93,22 @@ class GateCalculator : public CalculatorBase { cc->InputSidePackets().HasTag(kDisallowTag); bool input_via_stream = cc->Inputs().HasTag(kAllowTag) || cc->Inputs().HasTag(kDisallowTag); - // Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW - // input. - RET_CHECK(input_via_side_packet ^ input_via_stream); + // Only one of input_side_packet or input_stream may specify + // ALLOW/DISALLOW input. if (input_via_side_packet) { + RET_CHECK(!input_via_stream); RET_CHECK(cc->InputSidePackets().HasTag(kAllowTag) ^ cc->InputSidePackets().HasTag(kDisallowTag)); if (cc->InputSidePackets().HasTag(kAllowTag)) { - cc->InputSidePackets().Tag(kAllowTag).Set(); + cc->InputSidePackets().Tag(kAllowTag).Set().Optional(); } else { - cc->InputSidePackets().Tag(kDisallowTag).Set(); + cc->InputSidePackets().Tag(kDisallowTag).Set().Optional(); } - } else { + } + if (input_via_stream) { + RET_CHECK(!input_via_side_packet); RET_CHECK(cc->Inputs().HasTag(kAllowTag) ^ cc->Inputs().HasTag(kDisallowTag)); @@ -139,7 +142,6 @@ class GateCalculator : public CalculatorBase { } absl::Status Open(CalculatorContext* cc) final { - use_side_packet_for_allow_disallow_ = false; if (cc->InputSidePackets().HasTag(kAllowTag)) { use_side_packet_for_allow_disallow_ = true; allow_by_side_packet_decision_ = @@ -158,12 +160,20 @@ class GateCalculator : public CalculatorBase { const auto& options = cc->Options<::mediapipe::GateCalculatorOptions>(); empty_packets_as_allow_ = options.empty_packets_as_allow(); + if (!use_side_packet_for_allow_disallow_ && + !cc->Inputs().HasTag(kAllowTag) && !cc->Inputs().HasTag(kDisallowTag)) { + use_option_for_allow_disallow_ = true; + allow_by_option_decision_ = options.allow(); + } + return absl::OkStatus(); } absl::Status Process(CalculatorContext* cc) final { bool allow = empty_packets_as_allow_; - if (use_side_packet_for_allow_disallow_) { + if (use_option_for_allow_disallow_) { + allow = allow_by_option_decision_; + } else if (use_side_packet_for_allow_disallow_) { allow = allow_by_side_packet_decision_; } else { if (cc->Inputs().HasTag(kAllowTag) && @@ -217,8 +227,10 @@ class GateCalculator : public CalculatorBase { GateState last_gate_state_ = GATE_UNINITIALIZED; int num_data_streams_; bool empty_packets_as_allow_; - bool use_side_packet_for_allow_disallow_; + bool use_side_packet_for_allow_disallow_ = false; bool allow_by_side_packet_decision_; + bool use_option_for_allow_disallow_ = false; + bool allow_by_option_decision_; }; REGISTER_CALCULATOR(GateCalculator); diff --git a/mediapipe/calculators/core/gate_calculator.proto b/mediapipe/calculators/core/gate_calculator.proto index 76bacc74e..32402bf28 100644 --- a/mediapipe/calculators/core/gate_calculator.proto +++ b/mediapipe/calculators/core/gate_calculator.proto @@ -29,4 +29,8 @@ message GateCalculatorOptions { // disallowing the corresponding packets in the data input streams. Setting // this option to true inverts that, allowing the data packets to go through. optional bool empty_packets_as_allow = 1; + + // Whether to allow or disallow the input streams to pass when no + // ALLOW/DISALLOW input or side input is specified. + optional bool allow = 2 [default = false]; } diff --git a/mediapipe/calculators/core/gate_calculator_test.cc b/mediapipe/calculators/core/gate_calculator_test.cc index 478fc485a..c523bce28 100644 --- a/mediapipe/calculators/core/gate_calculator_test.cc +++ b/mediapipe/calculators/core/gate_calculator_test.cc @@ -113,6 +113,68 @@ TEST_F(GateCalculatorTest, InvalidInputs) { )"))); } +TEST_F(GateCalculatorTest, AllowByALLOWOptionToTrue) { + SetRunner(R"( + calculator: "GateCalculator" + input_stream: "test_input" + output_stream: "test_output" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } + )"); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(2, output.size()); + EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value()); + EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value()); + EXPECT_EQ(true, output[0].Get()); + EXPECT_EQ(false, output[1].Get()); +} + +TEST_F(GateCalculatorTest, DisallowByALLOWOptionSetToFalse) { + SetRunner(R"( + calculator: "GateCalculator" + input_stream: "test_input" + output_stream: "test_output" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: false + } + } + )"); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(0, output.size()); +} + +TEST_F(GateCalculatorTest, DisallowByALLOWOptionNotSet) { + SetRunner(R"( + calculator: "GateCalculator" + input_stream: "test_input" + output_stream: "test_output" + )"); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(0, output.size()); +} + TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) { SetRunner(R"( calculator: "GateCalculator" diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 507b6f0ff..0bbfadd05 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -661,3 +661,138 @@ cc_test( "//mediapipe/framework/port:parse_text_proto", ], ) + +cc_library( + name = "affine_transformation", + hdrs = ["affine_transformation.h"], + deps = ["@com_google_absl//absl/status:statusor"], +) + +cc_library( + name = "affine_transformation_runner_gl", + srcs = ["affine_transformation_runner_gl.cc"], + hdrs = ["affine_transformation_runner_gl.h"], + deps = [ + ":affine_transformation", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:ret_check", + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gl_simple_shaders", + "//mediapipe/gpu:gpu_buffer", + "//mediapipe/gpu:gpu_origin_cc_proto", + "//mediapipe/gpu:shader_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "affine_transformation_runner_opencv", + srcs = ["affine_transformation_runner_opencv.cc"], + hdrs = ["affine_transformation_runner_opencv.h"], + deps = [ + ":affine_transformation", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/status:statusor", + "@eigen_archive//:eigen3", + ], +) + +mediapipe_proto_library( + name = "warp_affine_calculator_proto", + srcs = ["warp_affine_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/gpu:gpu_origin_proto", + ], +) + +cc_library( + name = "warp_affine_calculator", + srcs = ["warp_affine_calculator.cc"], + hdrs = ["warp_affine_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + ":affine_transformation", + ":affine_transformation_runner_opencv", + ":warp_affine_calculator_cc_proto", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/api2:port", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ] + select({ + "//mediapipe/gpu:disable_gpu": [], + "//conditions:default": [ + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gpu_buffer", + ":affine_transformation_runner_gl", + ], + }), + alwayslink = 1, +) + +cc_test( + name = "warp_affine_calculator_test", + srcs = ["warp_affine_calculator_test.cc"], + data = [ + "//mediapipe/calculators/tensor:testdata/image_to_tensor/input.jpg", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/medium_sub_rect_with_rotation_border_zero.png", + "//mediapipe/calculators/tensor:testdata/image_to_tensor/noop_except_range.png", + ], + tags = ["desktop_only_test"], + deps = [ + ":affine_transformation", + ":warp_affine_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_converter", + "//mediapipe/calculators/tensor:image_to_tensor_utils", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/calculators/util:to_image_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgcodecs", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) diff --git a/mediapipe/calculators/image/affine_transformation.h b/mediapipe/calculators/image/affine_transformation.h new file mode 100644 index 000000000..40793e7a1 --- /dev/null +++ b/mediapipe/calculators/image/affine_transformation.h @@ -0,0 +1,55 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_ +#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_ + +#include + +#include "absl/status/statusor.h" + +namespace mediapipe { + +class AffineTransformation { + public: + // Pixel extrapolation method. + // When converting image to tensor it may happen that tensor needs to read + // pixels outside image boundaries. Border mode helps to specify how such + // pixels will be calculated. + enum class BorderMode { kZero, kReplicate }; + + struct Size { + int width; + int height; + }; + + template + class Runner { + public: + virtual ~Runner() = default; + + // Transforms input into output using @matrix as following: + // output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3], + // matrix[4] * x + matrix[5] * y + matrix[7]) + // where x and y ranges are defined by @output_size. + virtual absl::StatusOr Run(const InputT& input, + const std::array& matrix, + const Size& output_size, + BorderMode border_mode) = 0; + }; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_H_ diff --git a/mediapipe/calculators/image/affine_transformation_runner_gl.cc b/mediapipe/calculators/image/affine_transformation_runner_gl.cc new file mode 100644 index 000000000..c38fc8e07 --- /dev/null +++ b/mediapipe/calculators/image/affine_transformation_runner_gl.cc @@ -0,0 +1,354 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/image/affine_transformation_runner_gl.h" + +#include +#include + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "Eigen/LU" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/calculators/image/affine_transformation.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/gpu/gl_calculator_helper.h" +#include "mediapipe/gpu/gl_simple_shaders.h" +#include "mediapipe/gpu/gpu_buffer.h" +#include "mediapipe/gpu/gpu_origin.pb.h" +#include "mediapipe/gpu/shader_util.h" + +namespace mediapipe { + +namespace { + +using mediapipe::GlCalculatorHelper; +using mediapipe::GlhCreateProgram; +using mediapipe::GlTexture; +using mediapipe::GpuBuffer; +using mediapipe::GpuOrigin; + +bool IsMatrixVerticalFlipNeeded(GpuOrigin::Mode gpu_origin) { + switch (gpu_origin) { + case GpuOrigin::DEFAULT: + case GpuOrigin::CONVENTIONAL: +#ifdef __APPLE__ + return false; +#else + return true; +#endif // __APPLE__ + case GpuOrigin::TOP_LEFT: + return false; + } +} + +#ifdef __APPLE__ +#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 0 +#else +#define GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED 1 +#endif // __APPLE__ + +bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) { + return gl_context.gl_major_version() > 3 || + (gl_context.gl_major_version() == 3 && + gl_context.gl_minor_version() >= 2); +} + +constexpr int kAttribVertex = 0; +constexpr int kAttribTexturePosition = 1; +constexpr int kNumAttributes = 2; + +class GlTextureWarpAffineRunner + : public AffineTransformation::Runner> { + public: + GlTextureWarpAffineRunner(std::shared_ptr gl_helper, + GpuOrigin::Mode gpu_origin) + : gl_helper_(gl_helper), gpu_origin_(gpu_origin) {} + absl::Status Init() { + return gl_helper_->RunInGlContext([this]() -> absl::Status { + const GLint attr_location[kNumAttributes] = { + kAttribVertex, + kAttribTexturePosition, + }; + const GLchar* attr_name[kNumAttributes] = { + "position", + "texture_coordinate", + }; + + constexpr GLchar kVertShader[] = R"( + in vec4 position; + in mediump vec4 texture_coordinate; + out mediump vec2 sample_coordinate; + uniform mat4 transform_matrix; + + void main() { + gl_Position = position; + vec4 tc = transform_matrix * texture_coordinate; + sample_coordinate = tc.xy; + } + )"; + + constexpr GLchar kFragShader[] = R"( + DEFAULT_PRECISION(mediump, float) + in vec2 sample_coordinate; + uniform sampler2D input_texture; + + #ifdef GL_ES + #define fragColor gl_FragColor + #else + out vec4 fragColor; + #endif // defined(GL_ES); + + void main() { + vec4 color = texture2D(input_texture, sample_coordinate); + #ifdef CUSTOM_ZERO_BORDER_MODE + float out_of_bounds = + float(sample_coordinate.x < 0.0 || sample_coordinate.x > 1.0 || + sample_coordinate.y < 0.0 || sample_coordinate.y > 1.0); + color = mix(color, vec4(0.0, 0.0, 0.0, 0.0), out_of_bounds); + #endif // defined(CUSTOM_ZERO_BORDER_MODE) + fragColor = color; + } + )"; + + // Create program and set parameters. + auto create_fn = [&](const std::string& vs, + const std::string& fs) -> absl::StatusOr { + GLuint program = 0; + GlhCreateProgram(vs.c_str(), fs.c_str(), kNumAttributes, &attr_name[0], + attr_location, &program); + + RET_CHECK(program) << "Problem initializing warp affine program."; + glUseProgram(program); + glUniform1i(glGetUniformLocation(program, "input_texture"), 1); + GLint matrix_id = glGetUniformLocation(program, "transform_matrix"); + return Program{.id = program, .matrix_id = matrix_id}; + }; + + const std::string vert_src = + absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble, kVertShader); + + const std::string frag_src = absl::StrCat( + mediapipe::kMediaPipeFragmentShaderPreamble, kFragShader); + + ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src)); + + auto create_custom_zero_fn = [&]() -> absl::StatusOr { + std::string custom_zero_border_mode_def = R"( + #define CUSTOM_ZERO_BORDER_MODE + )"; + const std::string frag_custom_zero_src = + absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble, + custom_zero_border_mode_def, kFragShader); + return create_fn(vert_src, frag_custom_zero_src); + }; +#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED + if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) { + ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn()); + } +#else + ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn()); +#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED + + glGenFramebuffers(1, &framebuffer_); + + // vertex storage + glGenBuffers(2, vbo_); + glGenVertexArrays(1, &vao_); + + // vbo 0 + glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]); + glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices), + mediapipe::kBasicSquareVertices, GL_STATIC_DRAW); + + // vbo 1 + glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]); + glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices), + mediapipe::kBasicTextureVertices, GL_STATIC_DRAW); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + + return absl::OkStatus(); + }); + } + + absl::StatusOr> Run( + const GpuBuffer& input, const std::array& matrix, + const AffineTransformation::Size& size, + AffineTransformation::BorderMode border_mode) override { + std::unique_ptr gpu_buffer; + MP_RETURN_IF_ERROR( + gl_helper_->RunInGlContext([this, &input, &matrix, &size, &border_mode, + &gpu_buffer]() -> absl::Status { + auto input_texture = gl_helper_->CreateSourceTexture(input); + auto output_texture = gl_helper_->CreateDestinationTexture( + size.width, size.height, input.format()); + + MP_RETURN_IF_ERROR( + RunInternal(input_texture, matrix, border_mode, &output_texture)); + gpu_buffer = output_texture.GetFrame(); + return absl::OkStatus(); + })); + + return gpu_buffer; + } + + absl::Status RunInternal(const GlTexture& texture, + const std::array& matrix, + AffineTransformation::BorderMode border_mode, + GlTexture* output) { + glDisable(GL_DEPTH_TEST); + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_); + glViewport(0, 0, output->width(), output->height()); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, output->name()); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + output->name(), 0); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(texture.target(), texture.name()); + + // a) Filtering. + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + // b) Clamping. + std::optional program = program_; + switch (border_mode) { + case AffineTransformation::BorderMode::kReplicate: { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + break; + } + case AffineTransformation::BorderMode::kZero: { +#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED + if (program_custom_zero_) { + program = program_custom_zero_; + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); + glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, + std::array{0.0f, 0.0f, 0.0f, 0.0f}.data()); + } +#else + RET_CHECK(program_custom_zero_) + << "Program must have been initialized."; + program = program_custom_zero_; +#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED + break; + } + } + glUseProgram(program->id); + + Eigen::Matrix eigen_mat(matrix.data()); + if (IsMatrixVerticalFlipNeeded(gpu_origin_)) { + // @matrix describes affine transformation in terms of TOP LEFT origin, so + // in some cases/on some platforms an extra flipping should be done before + // and after. + const Eigen::Matrix flip_y( + {{1.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, -1.0f, 0.0f, 1.0f}, + {0.0f, 0.0f, 1.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 1.0f}}); + eigen_mat = flip_y * eigen_mat * flip_y; + } + + // If GL context is ES2, then GL_FALSE must be used for 'transpose' + // GLboolean in glUniformMatrix4fv, or else INVALID_VALUE error is reported. + // Hence, transposing the matrix and always passing transposed. + eigen_mat.transposeInPlace(); + glUniformMatrix4fv(program->matrix_id, 1, GL_FALSE, eigen_mat.data()); + + // vao + glBindVertexArray(vao_); + + // vbo 0 + glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]); + glEnableVertexAttribArray(kAttribVertex); + glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr); + + // vbo 1 + glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]); + glEnableVertexAttribArray(kAttribTexturePosition); + glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr); + + // draw + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + // Resetting to MediaPipe texture param defaults. + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + glDisableVertexAttribArray(kAttribVertex); + glDisableVertexAttribArray(kAttribTexturePosition); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(0); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + + return absl::OkStatus(); + } + + ~GlTextureWarpAffineRunner() override { + gl_helper_->RunInGlContext([this]() { + // Release OpenGL resources. + if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_); + if (program_.id != 0) glDeleteProgram(program_.id); + if (program_custom_zero_ && program_custom_zero_->id != 0) { + glDeleteProgram(program_custom_zero_->id); + } + if (vao_ != 0) glDeleteVertexArrays(1, &vao_); + glDeleteBuffers(2, vbo_); + }); + } + + private: + struct Program { + GLuint id; + GLint matrix_id; + }; + std::shared_ptr gl_helper_; + GpuOrigin::Mode gpu_origin_; + GLuint vao_ = 0; + GLuint vbo_[2] = {0, 0}; + Program program_; + std::optional program_custom_zero_; + GLuint framebuffer_ = 0; +}; + +#undef GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED + +} // namespace + +absl::StatusOr>>> +CreateAffineTransformationGlRunner( + std::shared_ptr gl_helper, GpuOrigin::Mode gpu_origin) { + auto runner = + absl::make_unique(gl_helper, gpu_origin); + MP_RETURN_IF_ERROR(runner->Init()); + return runner; +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/affine_transformation_runner_gl.h b/mediapipe/calculators/image/affine_transformation_runner_gl.h new file mode 100644 index 000000000..677e0720d --- /dev/null +++ b/mediapipe/calculators/image/affine_transformation_runner_gl.h @@ -0,0 +1,36 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_ +#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/image/affine_transformation.h" +#include "mediapipe/gpu/gl_calculator_helper.h" +#include "mediapipe/gpu/gpu_buffer.h" +#include "mediapipe/gpu/gpu_origin.pb.h" + +namespace mediapipe { + +absl::StatusOr>>> +CreateAffineTransformationGlRunner( + std::shared_ptr gl_helper, + mediapipe::GpuOrigin::Mode gpu_origin); + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_GL_H_ diff --git a/mediapipe/calculators/image/affine_transformation_runner_opencv.cc b/mediapipe/calculators/image/affine_transformation_runner_opencv.cc new file mode 100644 index 000000000..46026a987 --- /dev/null +++ b/mediapipe/calculators/image/affine_transformation_runner_opencv.cc @@ -0,0 +1,160 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h" + +#include + +#include "absl/memory/memory.h" +#include "absl/status/statusor.h" +#include "mediapipe/calculators/image/affine_transformation.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { + +namespace { + +cv::BorderTypes GetBorderModeForOpenCv( + AffineTransformation::BorderMode border_mode) { + switch (border_mode) { + case AffineTransformation::BorderMode::kZero: + return cv::BORDER_CONSTANT; + case AffineTransformation::BorderMode::kReplicate: + return cv::BORDER_REPLICATE; + } +} + +class OpenCvRunner + : public AffineTransformation::Runner { + public: + absl::StatusOr Run( + const ImageFrame& input, const std::array& matrix, + const AffineTransformation::Size& size, + AffineTransformation::BorderMode border_mode) override { + // OpenCV warpAffine works in absolute coordinates, so the transfom (which + // accepts and produces relative coordinates) should be adjusted to first + // normalize coordinates and then scale them. + // clang-format off + cv::Matx44f normalize_dst_coordinate({ + 1.0f / size.width, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f / size.height, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f}); + cv::Matx44f scale_src_coordinate({ + 1.0f * input.Width(), 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f * input.Height(), 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f}); + // clang-format on + cv::Matx44f adjust_dst_coordinate; + cv::Matx44f adjust_src_coordinate; + // TODO: update to always use accurate implementation. + constexpr bool kOpenCvCompatibility = true; + if (kOpenCvCompatibility) { + adjust_dst_coordinate = normalize_dst_coordinate; + adjust_src_coordinate = scale_src_coordinate; + } else { + // To do an accurate affine image transformation and make "on-cpu" and + // "on-gpu" calculations aligned - extra offset is required to select + // correct pixels. + // + // Each destination pixel corresponds to some pixels region from source + // image.(In case of downscaling there can be more than one pixel.) The + // offset for x and y is calculated in the way, so pixel in the middle of + // the region is selected. + // + // For simplicity sake, let's consider downscaling from 100x50 to 10x10 + // without a rotation: + // 1. Each destination pixel corresponds to 10x5 region + // X range: [0, .. , 9] + // Y range: [0, .. , 4] + // 2. Considering we have __discrete__ pixels, the center of the region is + // between (4, 2) and (5, 2) pixels, let's assume it's a "pixel" + // (4.5, 2). + // 3. When using the above as an offset for every pixel select while + // downscaling, resulting pixels are: + // (4.5, 2), (14.5, 2), .. , (94.5, 2) + // (4.5, 7), (14.5, 7), .. , (94.5, 7) + // .. + // (4.5, 47), (14.5, 47), .., (94.5, 47) + // instead of: + // (0, 0), (10, 0), .. , (90, 0) + // (0, 5), (10, 7), .. , (90, 5) + // .. + // (0, 45), (10, 45), .., (90, 45) + // The latter looks shifted. + // + // Offsets are needed, so that __discrete__ pixel at (0, 0) corresponds to + // the same pixel as would __non discrete__ pixel at (0.5, 0.5). Hence, + // transformation matrix should shift coordinates by (0.5, 0.5) as the + // very first step. + // + // Due to the above shift, transformed coordinates would be valid for + // float coordinates where pixel (0, 0) spans [0.0, 1.0) x [0.0, 1.0). + // T0 make it valid for __discrete__ pixels, transformation matrix should + // shift coordinate by (-0.5f, -0.5f) as the very last step. (E.g. if we + // get (0.5f, 0.5f), then it's (0, 0) __discrete__ pixel.) + // clang-format off + cv::Matx44f shift_dst({1.0f, 0.0f, 0.0f, 0.5f, + 0.0f, 1.0f, 0.0f, 0.5f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f}); + cv::Matx44f shift_src({1.0f, 0.0f, 0.0f, -0.5f, + 0.0f, 1.0f, 0.0f, -0.5f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f}); + // clang-format on + adjust_dst_coordinate = normalize_dst_coordinate * shift_dst; + adjust_src_coordinate = shift_src * scale_src_coordinate; + } + + cv::Matx44f transform(matrix.data()); + cv::Matx44f transform_absolute = + adjust_src_coordinate * transform * adjust_dst_coordinate; + + cv::Mat in_mat = formats::MatView(&input); + + cv::Mat cv_affine_transform(2, 3, CV_32F); + cv_affine_transform.at(0, 0) = transform_absolute.val[0]; + cv_affine_transform.at(0, 1) = transform_absolute.val[1]; + cv_affine_transform.at(0, 2) = transform_absolute.val[3]; + cv_affine_transform.at(1, 0) = transform_absolute.val[4]; + cv_affine_transform.at(1, 1) = transform_absolute.val[5]; + cv_affine_transform.at(1, 2) = transform_absolute.val[7]; + + ImageFrame out_image(input.Format(), size.width, size.height); + cv::Mat out_mat = formats::MatView(&out_image); + + cv::warpAffine(in_mat, out_mat, cv_affine_transform, + cv::Size(out_mat.cols, out_mat.rows), + /*flags=*/cv::INTER_LINEAR | cv::WARP_INVERSE_MAP, + GetBorderModeForOpenCv(border_mode)); + + return out_image; + } +}; + +} // namespace + +absl::StatusOr< + std::unique_ptr>> +CreateAffineTransformationOpenCvRunner() { + return absl::make_unique(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/affine_transformation_runner_opencv.h b/mediapipe/calculators/image/affine_transformation_runner_opencv.h new file mode 100644 index 000000000..200281c95 --- /dev/null +++ b/mediapipe/calculators/image/affine_transformation_runner_opencv.h @@ -0,0 +1,32 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_ +#define MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_ + +#include + +#include "absl/status/statusor.h" +#include "mediapipe/calculators/image/affine_transformation.h" +#include "mediapipe/framework/formats/image_frame.h" + +namespace mediapipe { + +absl::StatusOr< + std::unique_ptr>> +CreateAffineTransformationOpenCvRunner(); + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_IMAGE_AFFINE_TRANSFORMATION_RUNNER_OPENCV_H_ diff --git a/mediapipe/calculators/image/scale_image_calculator.cc b/mediapipe/calculators/image/scale_image_calculator.cc index 575268da5..0669f5322 100644 --- a/mediapipe/calculators/image/scale_image_calculator.cc +++ b/mediapipe/calculators/image/scale_image_calculator.cc @@ -262,6 +262,7 @@ absl::Status ScaleImageCalculator::InitializeFrameInfo(CalculatorContext* cc) { scale_image::FindOutputDimensions(crop_width_, crop_height_, // options_.target_width(), // options_.target_height(), // + options_.target_max_area(), // options_.preserve_aspect_ratio(), // options_.scale_to_multiple_of(), // &output_width_, &output_height_)); diff --git a/mediapipe/calculators/image/scale_image_calculator.proto b/mediapipe/calculators/image/scale_image_calculator.proto index e51ccafaa..2b7572d56 100644 --- a/mediapipe/calculators/image/scale_image_calculator.proto +++ b/mediapipe/calculators/image/scale_image_calculator.proto @@ -28,6 +28,11 @@ message ScaleImageCalculatorOptions { optional int32 target_width = 1; optional int32 target_height = 2; + // If set, then automatically calculates a target_width and target_height that + // has an area below the target max area. Aspect ratio preservation cannot be + // disabled. + optional int32 target_max_area = 15; + // If true, the image is scaled up or down proportionally so that it // fits inside the box represented by target_width and target_height. // Otherwise it is scaled to fit target_width and target_height diff --git a/mediapipe/calculators/image/scale_image_utils.cc b/mediapipe/calculators/image/scale_image_utils.cc index 738e83da0..490d0336a 100644 --- a/mediapipe/calculators/image/scale_image_utils.cc +++ b/mediapipe/calculators/image/scale_image_utils.cc @@ -92,12 +92,21 @@ absl::Status FindOutputDimensions(int input_width, // int input_height, // int target_width, // int target_height, // + int target_max_area, // bool preserve_aspect_ratio, // int scale_to_multiple_of, // int* output_width, int* output_height) { CHECK(output_width); CHECK(output_height); + if (target_max_area > 0 && input_width * input_height > target_max_area) { + preserve_aspect_ratio = true; + target_height = static_cast(sqrt(static_cast(target_max_area) / + (static_cast(input_width) / + static_cast(input_height)))); + target_width = -1; // Resize width to preserve aspect ratio. + } + if (preserve_aspect_ratio) { RET_CHECK(scale_to_multiple_of == 2) << "FindOutputDimensions always outputs width and height that are " @@ -164,5 +173,17 @@ absl::Status FindOutputDimensions(int input_width, // << "Unable to set output dimensions based on target dimensions."; } +absl::Status FindOutputDimensions(int input_width, // + int input_height, // + int target_width, // + int target_height, // + bool preserve_aspect_ratio, // + int scale_to_multiple_of, // + int* output_width, int* output_height) { + return FindOutputDimensions( + input_width, input_height, target_width, target_height, -1, + preserve_aspect_ratio, scale_to_multiple_of, output_width, output_height); +} + } // namespace scale_image } // namespace mediapipe diff --git a/mediapipe/calculators/image/scale_image_utils.h b/mediapipe/calculators/image/scale_image_utils.h index c2c0b8f7c..e7fccd8dc 100644 --- a/mediapipe/calculators/image/scale_image_utils.h +++ b/mediapipe/calculators/image/scale_image_utils.h @@ -34,15 +34,25 @@ absl::Status FindCropDimensions(int input_width, int input_height, // int* crop_width, int* crop_height, // int* col_start, int* row_start); -// Given an input width and height, a target width and height, whether to -// preserve the aspect ratio, and whether to round-down to the multiple of a -// given number nearest to the targets, determine the output width and height. -// If target_width or target_height is non-positive, then they will be set to -// the input_width and input_height respectively. If scale_to_multiple_of is -// less than 1, it will be treated like 1. The output_width and -// output_height will be reduced as necessary to preserve_aspect_ratio if the -// option is specified. If preserving the aspect ratio is desired, you must set -// scale_to_multiple_of to 2. +// Given an input width and height, a target width and height or max area, +// whether to preserve the aspect ratio, and whether to round-down to the +// multiple of a given number nearest to the targets, determine the output width +// and height. If target_width or target_height is non-positive, then they will +// be set to the input_width and input_height respectively. If target_area is +// non-positive, then it will be ignored. If scale_to_multiple_of is less than +// 1, it will be treated like 1. The output_width and output_height will be +// reduced as necessary to preserve_aspect_ratio if the option is specified. If +// preserving the aspect ratio is desired, you must set scale_to_multiple_of +// to 2. +absl::Status FindOutputDimensions(int input_width, int input_height, // + int target_width, + int target_height, // + int target_max_area, // + bool preserve_aspect_ratio, // + int scale_to_multiple_of, // + int* output_width, int* output_height); + +// Backwards compatible helper. absl::Status FindOutputDimensions(int input_width, int input_height, // int target_width, int target_height, // diff --git a/mediapipe/calculators/image/scale_image_utils_test.cc b/mediapipe/calculators/image/scale_image_utils_test.cc index 14a58e762..bda1fa4d6 100644 --- a/mediapipe/calculators/image/scale_image_utils_test.cc +++ b/mediapipe/calculators/image/scale_image_utils_test.cc @@ -79,49 +79,49 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsPreserveRatio) { int output_width; int output_height; // Not scale. - MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(200, output_width); EXPECT_EQ(100, output_height); // Not scale with odd input size. - MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, false, 1, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, -1, -1, false, 1, + &output_width, &output_height)); EXPECT_EQ(201, output_width); EXPECT_EQ(101, output_height); // Scale down by 1/2. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(100, output_width); EXPECT_EQ(50, output_height); // Scale up, doubling dimensions. - MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(400, output_width); EXPECT_EQ(200, output_height); // Fits a 2:1 image into a 150 x 150 box. Output dimensions are always // visible by 2. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 150, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(150, output_width); EXPECT_EQ(74, output_height); // Fits a 2:1 image into a 400 x 50 box. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 50, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(100, output_width); EXPECT_EQ(50, output_height); // Scale to multiple number with odd targe size. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(100, output_width); EXPECT_EQ(50, output_height); // Scale to multiple number with odd targe size. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 101, -1, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(100, output_width); EXPECT_EQ(50, output_height); // Scale to odd size. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, false, 1, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 151, 101, -1, false, 1, + &output_width, &output_height)); EXPECT_EQ(151, output_width); EXPECT_EQ(101, output_height); } @@ -131,18 +131,18 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsNoAspectRatio) { int output_width; int output_height; // Scale width only. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 2, + &output_width, &output_height)); EXPECT_EQ(100, output_width); EXPECT_EQ(100, output_height); // Scale height only. - MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, false, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, 200, -1, false, 2, + &output_width, &output_height)); EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_height); // Scale both dimensions. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 2, + &output_width, &output_height)); EXPECT_EQ(150, output_width); EXPECT_EQ(200, output_height); } @@ -152,41 +152,78 @@ TEST(ScaleImageUtilsTest, FindOutputDimensionsDownScaleToMultipleOf) { int output_width; int output_height; // Set no targets, downscale to a multiple of 8. - MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, false, 8, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(100, 100, -1, -1, -1, false, 8, + &output_width, &output_height)); EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_height); // Set width target, downscale to a multiple of 8. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, false, 8, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 100, -1, -1, false, 8, + &output_width, &output_height)); EXPECT_EQ(96, output_width); EXPECT_EQ(96, output_height); // Set height target, downscale to a multiple of 8. - MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, false, 8, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(201, 101, -1, 201, -1, false, 8, + &output_width, &output_height)); EXPECT_EQ(200, output_width); EXPECT_EQ(200, output_height); // Set both targets, downscale to a multiple of 8. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, false, 8, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 150, 200, -1, false, 8, + &output_width, &output_height)); EXPECT_EQ(144, output_width); EXPECT_EQ(200, output_height); // Doesn't throw error if keep aspect is true and downscale multiple is 2. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, true, 2, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 400, 200, -1, true, 2, + &output_width, &output_height)); EXPECT_EQ(400, output_width); EXPECT_EQ(200, output_height); // Throws error if keep aspect is true, but downscale multiple is not 2. - ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, true, 4, &output_width, - &output_height), + ASSERT_THAT(FindOutputDimensions(200, 100, 400, 200, -1, true, 4, + &output_width, &output_height), testing::Not(testing::status::IsOk())); // Downscaling to multiple ignored if multiple is less than 2. - MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, false, 1, &output_width, - &output_height)); + MP_ASSERT_OK(FindOutputDimensions(200, 100, 401, 201, -1, false, 1, + &output_width, &output_height)); EXPECT_EQ(401, output_width); EXPECT_EQ(201, output_height); } +// Tests scaling without keeping the aspect ratio fixed. +TEST(ScaleImageUtilsTest, FindOutputDimensionsMaxArea) { + int output_width; + int output_height; + // Smaller area. + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 9000, false, 2, + &output_width, &output_height)); + EXPECT_NEAR( + 200 / 100, + static_cast(output_width) / static_cast(output_height), + 0.1f); + EXPECT_LE(output_width * output_height, 9000); + // Close to original area. + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 19999, false, 2, + &output_width, &output_height)); + EXPECT_NEAR( + 200.0 / 100.0, + static_cast(output_width) / static_cast(output_height), + 0.1f); + EXPECT_LE(output_width * output_height, 19999); + // Don't scale with larger area. + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20001, false, 2, + &output_width, &output_height)); + EXPECT_EQ(200, output_width); + EXPECT_EQ(100, output_height); + // Don't scale with equal area. + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, 20000, false, 2, + &output_width, &output_height)); + EXPECT_EQ(200, output_width); + EXPECT_EQ(100, output_height); + // Don't scale at all. + MP_ASSERT_OK(FindOutputDimensions(200, 100, -1, -1, -1, false, 2, + &output_width, &output_height)); + EXPECT_EQ(200, output_width); + EXPECT_EQ(100, output_height); +} + } // namespace } // namespace scale_image } // namespace mediapipe diff --git a/mediapipe/calculators/image/warp_affine_calculator.cc b/mediapipe/calculators/image/warp_affine_calculator.cc new file mode 100644 index 000000000..e3d017a35 --- /dev/null +++ b/mediapipe/calculators/image/warp_affine_calculator.cc @@ -0,0 +1,211 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/image/warp_affine_calculator.h" + +#include +#include +#include + +#include "mediapipe/calculators/image/affine_transformation.h" +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/calculators/image/affine_transformation_runner_gl.h" +#endif // !MEDIAPIPE_DISABLE_GPU +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/calculators/image/affine_transformation_runner_opencv.h" +#include "mediapipe/calculators/image/warp_affine_calculator.pb.h" +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/port/ret_check.h" +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gl_calculator_helper.h" +#include "mediapipe/gpu/gpu_buffer.h" +#endif // !MEDIAPIPE_DISABLE_GPU + +namespace mediapipe { + +namespace { + +AffineTransformation::BorderMode GetBorderMode( + mediapipe::WarpAffineCalculatorOptions::BorderMode border_mode) { + switch (border_mode) { + case mediapipe::WarpAffineCalculatorOptions::BORDER_ZERO: + return AffineTransformation::BorderMode::kZero; + case mediapipe::WarpAffineCalculatorOptions::BORDER_UNSPECIFIED: + case mediapipe::WarpAffineCalculatorOptions::BORDER_REPLICATE: + return AffineTransformation::BorderMode::kReplicate; + } +} + +template +class WarpAffineRunnerHolder {}; + +template <> +class WarpAffineRunnerHolder { + public: + using RunnerType = AffineTransformation::Runner; + absl::Status Open(CalculatorContext* cc) { return absl::OkStatus(); } + absl::StatusOr GetRunner() { + if (!runner_) { + ASSIGN_OR_RETURN(runner_, CreateAffineTransformationOpenCvRunner()); + } + return runner_.get(); + } + + private: + std::unique_ptr runner_; +}; + +#if !MEDIAPIPE_DISABLE_GPU +template <> +class WarpAffineRunnerHolder { + public: + using RunnerType = + AffineTransformation::Runner>; + absl::Status Open(CalculatorContext* cc) { + gpu_origin_ = + cc->Options().gpu_origin(); + gl_helper_ = std::make_shared(); + return gl_helper_->Open(cc); + } + absl::StatusOr GetRunner() { + if (!runner_) { + ASSIGN_OR_RETURN( + runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_)); + } + return runner_.get(); + } + + private: + mediapipe::GpuOrigin::Mode gpu_origin_; + std::shared_ptr gl_helper_; + std::unique_ptr runner_; +}; +#endif // !MEDIAPIPE_DISABLE_GPU + +template <> +class WarpAffineRunnerHolder { + public: + absl::Status Open(CalculatorContext* cc) { return runner_.Open(cc); } + absl::StatusOr< + AffineTransformation::Runner*> + GetRunner() { + return &runner_; + } + + private: + class Runner : public AffineTransformation::Runner { + public: + absl::Status Open(CalculatorContext* cc) { + MP_RETURN_IF_ERROR(cpu_holder_.Open(cc)); +#if !MEDIAPIPE_DISABLE_GPU + MP_RETURN_IF_ERROR(gpu_holder_.Open(cc)); +#endif // !MEDIAPIPE_DISABLE_GPU + return absl::OkStatus(); + } + absl::StatusOr Run( + const mediapipe::Image& input, const std::array& matrix, + const AffineTransformation::Size& size, + AffineTransformation::BorderMode border_mode) override { + if (input.UsesGpu()) { +#if !MEDIAPIPE_DISABLE_GPU + ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner()); + ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix, + size, border_mode)); + return mediapipe::Image(*result); +#else + return absl::UnavailableError("GPU support is disabled"); +#endif // !MEDIAPIPE_DISABLE_GPU + } + ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner()); + const auto& frame_ptr = input.GetImageFrameSharedPtr(); + // Wrap image into image frame. + const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(), + frame_ptr->Height(), frame_ptr->WidthStep(), + const_cast(frame_ptr->PixelData()), + [](uint8* data) {}); + ASSIGN_OR_RETURN(auto result, + runner->Run(image_frame, matrix, size, border_mode)); + return mediapipe::Image(std::make_shared(std::move(result))); + } + + private: + WarpAffineRunnerHolder cpu_holder_; +#if !MEDIAPIPE_DISABLE_GPU + WarpAffineRunnerHolder gpu_holder_; +#endif // !MEDIAPIPE_DISABLE_GPU + }; + + Runner runner_; +}; + +template +class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl { + public: +#if !MEDIAPIPE_DISABLE_GPU + static absl::Status UpdateContract(CalculatorContract* cc) { + if constexpr (std::is_same_v || + std::is_same_v) { + MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); + } + return absl::OkStatus(); + } +#endif // !MEDIAPIPE_DISABLE_GPU + + absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); } + + absl::Status Process(CalculatorContext* cc) override { + if (InterfaceT::kInImage(cc).IsEmpty() || + InterfaceT::kMatrix(cc).IsEmpty() || + InterfaceT::kOutputSize(cc).IsEmpty()) { + return absl::OkStatus(); + } + const std::array& transform = *InterfaceT::kMatrix(cc); + auto [out_width, out_height] = *InterfaceT::kOutputSize(cc); + AffineTransformation::Size output_size; + output_size.width = out_width; + output_size.height = out_height; + ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner()); + ASSIGN_OR_RETURN( + auto result, + runner->Run( + *InterfaceT::kInImage(cc), transform, output_size, + GetBorderMode(cc->Options() + .border_mode()))); + InterfaceT::kOutImage(cc).Send(std::move(result)); + + return absl::OkStatus(); + } + + private: + WarpAffineRunnerHolder + holder_; +}; + +} // namespace + +MEDIAPIPE_NODE_IMPLEMENTATION( + WarpAffineCalculatorImpl); +#if !MEDIAPIPE_DISABLE_GPU +MEDIAPIPE_NODE_IMPLEMENTATION( + WarpAffineCalculatorImpl); +#endif // !MEDIAPIPE_DISABLE_GPU +MEDIAPIPE_NODE_IMPLEMENTATION(WarpAffineCalculatorImpl); + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/warp_affine_calculator.h b/mediapipe/calculators/image/warp_affine_calculator.h new file mode 100644 index 000000000..4a1b07030 --- /dev/null +++ b/mediapipe/calculators/image/warp_affine_calculator.h @@ -0,0 +1,94 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_ + +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" + +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gpu_buffer.h" +#endif // !MEDIAPIPE_DISABLE_GPU + +namespace mediapipe { + +// Runs affine transformation. +// +// Input: +// IMAGE - Image/ImageFrame/GpuBuffer +// +// MATRIX - std::array +// Used as following: +// output(x, y) = input(matrix[0] * x + matrix[1] * y + matrix[3], +// matrix[4] * x + matrix[5] * y + matrix[7]) +// where x and y ranges are defined by @OUTPUT_SIZE. +// +// OUTPUT_SIZE - std::pair +// Size of the output image. +// +// Output: +// IMAGE - Image/ImageFrame/GpuBuffer +// +// Note: +// - Output image type and format are the same as the input one. +// +// Usage example: +// node { +// calculator: "WarpAffineCalculator(Cpu|Gpu)" +// input_stream: "IMAGE:image" +// input_stream: "MATRIX:matrix" +// input_stream: "OUTPUT_SIZE:size" +// output_stream: "IMAGE:transformed_image" +// options: { +// [mediapipe.WarpAffineCalculatorOptions.ext] { +// border_mode: BORDER_ZERO +// } +// } +// } +template +class WarpAffineCalculatorIntf : public mediapipe::api2::NodeIntf { + public: + static constexpr mediapipe::api2::Input kInImage{"IMAGE"}; + static constexpr mediapipe::api2::Input> kMatrix{ + "MATRIX"}; + static constexpr mediapipe::api2::Input> kOutputSize{ + "OUTPUT_SIZE"}; + static constexpr mediapipe::api2::Output kOutImage{"IMAGE"}; +}; + +class WarpAffineCalculatorCpu : public WarpAffineCalculatorIntf { + public: + MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorCpu, kInImage, kMatrix, + kOutputSize, kOutImage); +}; +#if !MEDIAPIPE_DISABLE_GPU +class WarpAffineCalculatorGpu + : public WarpAffineCalculatorIntf { + public: + MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculatorGpu, kInImage, kMatrix, + kOutputSize, kOutImage); +}; +#endif // !MEDIAPIPE_DISABLE_GPU +class WarpAffineCalculator : public WarpAffineCalculatorIntf { + public: + MEDIAPIPE_NODE_INTERFACE(WarpAffineCalculator, kInImage, kMatrix, kOutputSize, + kOutImage); +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_IMAGE_WARP_AFFINE_CALCULATOR_H_ diff --git a/mediapipe/calculators/image/warp_affine_calculator.proto b/mediapipe/calculators/image/warp_affine_calculator.proto new file mode 100644 index 000000000..20e6c1b07 --- /dev/null +++ b/mediapipe/calculators/image/warp_affine_calculator.proto @@ -0,0 +1,46 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/gpu/gpu_origin.proto"; + +message WarpAffineCalculatorOptions { + extend CalculatorOptions { + optional WarpAffineCalculatorOptions ext = 373693895; + } + + // Pixel extrapolation methods. See @border_mode. + enum BorderMode { + BORDER_UNSPECIFIED = 0; + BORDER_ZERO = 1; + BORDER_REPLICATE = 2; + } + + // Pixel extrapolation method. + // When converting image to tensor it may happen that tensor needs to read + // pixels outside image boundaries. Border mode helps to specify how such + // pixels will be calculated. + // + // BORDER_REPLICATE is used by default. + optional BorderMode border_mode = 1; + + // For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs + // to be flipped vertically as tensors are expected to start at top. + // (DEFAULT or unset interpreted as CONVENTIONAL.) + optional GpuOrigin.Mode gpu_origin = 2; +} diff --git a/mediapipe/calculators/image/warp_affine_calculator_test.cc b/mediapipe/calculators/image/warp_affine_calculator_test.cc new file mode 100644 index 000000000..959912cc9 --- /dev/null +++ b/mediapipe/calculators/image/warp_affine_calculator_test.cc @@ -0,0 +1,615 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/flags/flag.h" +#include "absl/memory/memory.h" +#include "absl/strings/substitute.h" +#include "mediapipe/calculators/image/affine_transformation.h" +#include "mediapipe/calculators/tensor/image_to_tensor_converter.h" +#include "mediapipe/calculators/tensor/image_to_tensor_utils.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgcodecs_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { +namespace { + +cv::Mat GetRgb(absl::string_view path) { + cv::Mat bgr = cv::imread(file::JoinPath("./", path)); + cv::Mat rgb(bgr.rows, bgr.cols, CV_8UC3); + int from_to[] = {0, 2, 1, 1, 2, 0}; + cv::mixChannels(&bgr, 1, &rgb, 1, from_to, 3); + return rgb; +} + +cv::Mat GetRgba(absl::string_view path) { + cv::Mat bgr = cv::imread(file::JoinPath("./", path)); + cv::Mat rgba(bgr.rows, bgr.cols, CV_8UC4, cv::Scalar(0, 0, 0, 0)); + int from_to[] = {0, 2, 1, 1, 2, 0}; + cv::mixChannels(&bgr, 1, &bgr, 1, from_to, 3); + return bgr; +} + +// Test template. +// No processing/assertions should be done after the function is invoked. +void RunTest(const std::string& graph_text, const std::string& tag, + const cv::Mat& input, cv::Mat expected_result, + float similarity_threshold, std::array matrix, + int out_width, int out_height, + absl::optional border_mode) { + std::string border_mode_str; + if (border_mode) { + switch (*border_mode) { + case AffineTransformation::BorderMode::kReplicate: + border_mode_str = "border_mode: BORDER_REPLICATE"; + break; + case AffineTransformation::BorderMode::kZero: + border_mode_str = "border_mode: BORDER_ZERO"; + break; + } + } + auto graph_config = mediapipe::ParseTextProtoOrDie( + absl::Substitute(graph_text, /*$0=*/border_mode_str)); + + std::vector output_packets; + tool::AddVectorSink("output_image", &graph_config, &output_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + + ImageFrame input_image( + input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB, + input.cols, input.rows, input.step, input.data, [](uint8*) {}); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", + MakePacket(std::move(input_image)).At(Timestamp(0)))); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "matrix", + MakePacket>(std::move(matrix)).At(Timestamp(0)))); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "output_size", MakePacket>( + std::pair(out_width, out_height)) + .At(Timestamp(0)))); + + MP_ASSERT_OK(graph.WaitUntilIdle()); + ASSERT_THAT(output_packets, testing::SizeIs(1)); + + // Get and process results. + const ImageFrame& out_frame = output_packets[0].Get(); + cv::Mat result = formats::MatView(&out_frame); + double similarity = + 1.0 - cv::norm(result, expected_result, cv::NORM_RELATIVE | cv::NORM_L2); + EXPECT_GE(similarity, similarity_threshold); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.CloseInputStream("matrix")); + MP_ASSERT_OK(graph.CloseInputStream("output_size")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +enum class InputType { kImageFrame, kImage }; + +// Similarity is checked against OpenCV results always, and due to differences +// on how OpenCV and GL treats pixels there are two thresholds. +// TODO: update to have just one threshold when OpenCV +// implementation is updated. +struct SimilarityConfig { + double threshold_on_cpu; + double threshold_on_gpu; +}; + +void RunTest(cv::Mat input, cv::Mat expected_result, + const SimilarityConfig& similarity, std::array matrix, + int out_width, int out_height, + absl::optional border_mode) { + RunTest(R"( + input_stream: "input_image" + input_stream: "output_size" + input_stream: "matrix" + node { + calculator: "WarpAffineCalculatorCpu" + input_stream: "IMAGE:input_image" + input_stream: "MATRIX:matrix" + input_stream: "OUTPUT_SIZE:output_size" + output_stream: "IMAGE:output_image" + options { + [mediapipe.WarpAffineCalculatorOptions.ext] { + $0 # border mode + } + } + } + )", + "cpu", input, expected_result, similarity.threshold_on_cpu, matrix, + out_width, out_height, border_mode); + + RunTest(R"( + input_stream: "input_image" + input_stream: "output_size" + input_stream: "matrix" + node { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:input_image" + output_stream: "IMAGE:input_image_unified" + } + node { + calculator: "WarpAffineCalculator" + input_stream: "IMAGE:input_image_unified" + input_stream: "MATRIX:matrix" + input_stream: "OUTPUT_SIZE:output_size" + output_stream: "IMAGE:output_image_unified" + options { + [mediapipe.WarpAffineCalculatorOptions.ext] { + $0 # border mode + } + } + } + node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:output_image_unified" + output_stream: "IMAGE_CPU:output_image" + } + )", + "cpu_image", input, expected_result, similarity.threshold_on_cpu, + matrix, out_width, out_height, border_mode); + + RunTest(R"( + input_stream: "input_image" + input_stream: "output_size" + input_stream: "matrix" + node { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "input_image" + output_stream: "input_image_gpu" + } + node { + calculator: "WarpAffineCalculatorGpu" + input_stream: "IMAGE:input_image_gpu" + input_stream: "MATRIX:matrix" + input_stream: "OUTPUT_SIZE:output_size" + output_stream: "IMAGE:output_image_gpu" + options { + [mediapipe.WarpAffineCalculatorOptions.ext] { + $0 # border mode + gpu_origin: TOP_LEFT + } + } + } + node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "output_image_gpu" + output_stream: "output_image" + } + )", + "gpu", input, expected_result, similarity.threshold_on_gpu, matrix, + out_width, out_height, border_mode); + + RunTest(R"( + input_stream: "input_image" + input_stream: "output_size" + input_stream: "matrix" + node { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "input_image" + output_stream: "input_image_gpu" + } + node { + calculator: "ToImageCalculator" + input_stream: "IMAGE_GPU:input_image_gpu" + output_stream: "IMAGE:input_image_unified" + } + node { + calculator: "WarpAffineCalculator" + input_stream: "IMAGE:input_image_unified" + input_stream: "MATRIX:matrix" + input_stream: "OUTPUT_SIZE:output_size" + output_stream: "IMAGE:output_image_unified" + options { + [mediapipe.WarpAffineCalculatorOptions.ext] { + $0 # border mode + gpu_origin: TOP_LEFT + } + } + } + node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:output_image_unified" + output_stream: "IMAGE_GPU:output_image_gpu" + } + node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "output_image_gpu" + output_stream: "output_image" + } + )", + "gpu_image", input, expected_result, similarity.threshold_on_gpu, + matrix, out_width, out_height, border_mode); +} + +std::array GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi, + bool keep_aspect_ratio, int out_width, + int out_height) { + std::array transform_mat; + mediapipe::RotatedRect roi_absolute = + mediapipe::GetRoi(input.cols, input.rows, roi); + mediapipe::PadRoi(out_width, out_height, keep_aspect_ratio, &roi_absolute) + .IgnoreError(); + mediapipe::GetRotatedSubRectToRectTransformMatrix( + roi_absolute, input.cols, input.rows, + /*flip_horizontaly=*/false, &transform_mat); + return transform_mat; +} + +TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspect) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = true; + std::optional border_mode = {}; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.82}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "medium_sub_rect_keep_aspect_border_zero.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotation) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(M_PI * 90.0f / 180.0f); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "medium_sub_rect_keep_aspect_with_rotation.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kReplicate; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.77}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, MediumSubRectKeepAspectWithRotationBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(M_PI * 90.0f / 180.0f); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "medium_sub_rect_keep_aspect_with_rotation_border_zero.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.75}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, MediumSubRectWithRotation) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(M_PI * -45.0f / 180.0f); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = false; + std::optional border_mode = + AffineTransformation::BorderMode::kReplicate; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.81}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, MediumSubRectWithRotationBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.65f); + roi.set_y_center(0.4f); + roi.set_width(0.5f); + roi.set_height(0.5f); + roi.set_rotation(M_PI * -45.0f / 180.0f); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "medium_sub_rect_with_rotation_border_zero.png"); + int out_width = 256; + int out_height = 256; + bool keep_aspect_ratio = false; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.80}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRect) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/large_sub_rect.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = false; + std::optional border_mode = + AffineTransformation::BorderMode::kReplicate; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.95}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRectBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = false; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.92}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspect) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kReplicate; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(0); + auto input = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgb( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "large_sub_rect_keep_aspect_border_zero.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.97}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotation) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(M_PI * -15.0f / 180.0f); + auto input = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "large_sub_rect_keep_aspect_with_rotation.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = {}; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.91}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, LargeSubRectKeepAspectWithRotationBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.5f); + roi.set_height(1.1f); + roi.set_rotation(M_PI * -15.0f / 180.0f); + auto input = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/" + "large_sub_rect_keep_aspect_with_rotation_border_zero.png"); + int out_width = 128; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.88}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, NoOp) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.0f); + roi.set_height(1.0f); + roi.set_rotation(0); + auto input = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/noop_except_range.png"); + int out_width = 64; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kReplicate; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +TEST(WarpAffineCalculatorTest, NoOpBorderZero) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.0f); + roi.set_height(1.0f); + roi.set_rotation(0); + auto input = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/input.jpg"); + auto expected_output = GetRgba( + "/mediapipe/calculators/" + "tensor/testdata/image_to_tensor/noop_except_range.png"); + int out_width = 64; + int out_height = 128; + bool keep_aspect_ratio = true; + std::optional border_mode = + AffineTransformation::BorderMode::kZero; + RunTest(input, expected_output, + {.threshold_on_cpu = 0.99, .threshold_on_gpu = 0.99}, + GetMatrix(input, roi, keep_aspect_ratio, out_width, out_height), + out_width, out_height, border_mode); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 71be05f6c..67273580d 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -26,6 +26,11 @@ licenses(["notice"]) package(default_visibility = ["//visibility:private"]) +exports_files( + glob(["testdata/image_to_tensor/*"]), + visibility = ["//mediapipe/calculators/image:__subpackages__"], +) + selects.config_setting_group( name = "compute_shader_unavailable", match_any = [ diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc index 5c22d734b..b579f0474 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc @@ -87,9 +87,9 @@ using GpuBuffer = mediapipe::GpuBuffer; // TENSORS - std::vector // Vector containing a single Tensor populated with an extrated RGB image. // MATRIX - std::array @Optional -// An std::array representing a 4x4 row-major-order matrix which -// can be used to map a point on the output tensor to a point on the input -// image. +// An std::array representing a 4x4 row-major-order matrix that +// maps a point on the input image to a point on the output tensor, and +// can be used to reverse the mapping by inverting the matrix. // LETTERBOX_PADDING - std::array @Optional // An std::array representing the letterbox padding from the 4 // sides ([left, top, right, bottom]) of the output image, normalized to diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.cc b/mediapipe/calculators/tensor/tensor_converter_calculator.cc index 82180fe52..f3c7c7b09 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.cc @@ -517,8 +517,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { uniform sampler2D frame; void main() { - $1 // flip - vec4 pixel = texture2D(frame, sample_coordinate); + vec2 coord = $1 + vec4 pixel = texture2D(frame, coord); $2 // normalize [-1,1] fragColor.r = pixel.r; // r channel $3 // g & b channels @@ -526,8 +526,9 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { })", /*$0=*/single_channel ? "vec1" : "vec4", /*$1=*/ - flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;" - : "", + flip_vertically_ + ? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);" + : "sample_coordinate;", /*$2=*/output_range_.has_value() ? absl::Substitute("pixel = pixel * float($0) + float($1);", (output_range_->second - output_range_->first), diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index edc208987..ac058610a 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -587,9 +587,21 @@ cc_library( "//mediapipe/framework/port:ret_check", ] + select({ "//conditions:default": [ - "//mediapipe/framework/port:file_helpers", ], - }), + "//mediapipe:android": [], + }) + select( + { + "//conditions:default": [ + ], + }, + ) + select( + { + "//conditions:default": [ + ], + "//mediapipe:android": [ + ], + }, + ), alwayslink = 1, ) diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc index 5d8008a29..3991f645d 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc @@ -37,6 +37,7 @@ const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE"; const char kImageTag[] = "IMAGE"; const char kFloatContextFeaturePrefixTag[] = "FLOAT_CONTEXT_FEATURE_"; const char kFloatFeaturePrefixTag[] = "FLOAT_FEATURE_"; +const char kBytesFeaturePrefixTag[] = "BYTES_FEATURE_"; const char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED"; const char kBBoxTag[] = "BBOX"; const char kKeypointsTag[] = "KEYPOINTS"; @@ -153,6 +154,9 @@ class PackMediaSequenceCalculator : public CalculatorBase { if (absl::StartsWith(tag, kFloatFeaturePrefixTag)) { cc->Inputs().Tag(tag).Set>(); } + if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) { + cc->Inputs().Tag(tag).Set>(); + } } CHECK(cc->Outputs().HasTag(kSequenceExampleTag) || @@ -231,6 +235,13 @@ class PackMediaSequenceCalculator : public CalculatorBase { mpms::ClearFeatureFloats(key, sequence_.get()); mpms::ClearFeatureTimestamp(key, sequence_.get()); } + if (absl::StartsWith(tag, kBytesFeaturePrefixTag)) { + std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) / + sizeof(*kBytesFeaturePrefixTag) - + 1); + mpms::ClearFeatureBytes(key, sequence_.get()); + mpms::ClearFeatureTimestamp(key, sequence_.get()); + } if (absl::StartsWith(tag, kKeypointsTag)) { std::string key = tag.substr(sizeof(kKeypointsTag) / sizeof(*kKeypointsTag) - 1); @@ -405,6 +416,17 @@ class PackMediaSequenceCalculator : public CalculatorBase { cc->Inputs().Tag(tag).Get>(), sequence_.get()); } + if (absl::StartsWith(tag, kBytesFeaturePrefixTag) && + !cc->Inputs().Tag(tag).IsEmpty()) { + std::string key = tag.substr(sizeof(kBytesFeaturePrefixTag) / + sizeof(*kBytesFeaturePrefixTag) - + 1); + mpms::AddFeatureTimestamp(key, cc->InputTimestamp().Value(), + sequence_.get()); + mpms::AddFeatureBytes( + key, cc->Inputs().Tag(tag).Get>(), + sequence_.get()); + } if (absl::StartsWith(tag, kBBoxTag) && !cc->Inputs().Tag(tag).IsEmpty()) { std::string key = ""; if (tag != kBBoxTag) { diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc index ffe8b5272..b39a0bac0 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator_test.cc @@ -49,6 +49,8 @@ constexpr char kKeypointsTestTag[] = "KEYPOINTS_TEST"; constexpr char kBboxPredictedTag[] = "BBOX_PREDICTED"; constexpr char kAudioOtherTag[] = "AUDIO_OTHER"; constexpr char kAudioTestTag[] = "AUDIO_TEST"; +constexpr char kBytesFeatureOtherTag[] = "BYTES_FEATURE_OTHER"; +constexpr char kBytesFeatureTestTag[] = "BYTES_FEATURE_TEST"; constexpr char kForwardFlowEncodedTag[] = "FORWARD_FLOW_ENCODED"; constexpr char kFloatContextFeatureOtherTag[] = "FLOAT_CONTEXT_FEATURE_OTHER"; constexpr char kFloatContextFeatureTestTag[] = "FLOAT_CONTEXT_FEATURE_TEST"; @@ -215,6 +217,54 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoFloatLists) { } } +TEST_F(PackMediaSequenceCalculatorTest, PacksTwoBytesLists) { + SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {}, + false, true); + auto input_sequence = ::absl::make_unique(); + + int num_timesteps = 2; + for (int i = 0; i < num_timesteps; ++i) { + auto vs_ptr = ::absl::make_unique>( + 2, absl::StrCat("foo", 2 << i)); + runner_->MutableInputs() + ->Tag(kBytesFeatureTestTag) + .packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i))); + vs_ptr = ::absl::make_unique>( + 2, absl::StrCat("bar", 2 << i)); + runner_->MutableInputs() + ->Tag(kBytesFeatureOtherTag) + .packets.push_back(Adopt(vs_ptr.release()).At(Timestamp(i))); + } + + runner_->MutableSidePackets()->Tag(kSequenceExampleTag) = + Adopt(input_sequence.release()); + + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets = + runner_->Outputs().Tag(kSequenceExampleTag).packets; + ASSERT_EQ(1, output_packets.size()); + const tf::SequenceExample& output_sequence = + output_packets[0].Get(); + + ASSERT_EQ(num_timesteps, + mpms::GetFeatureTimestampSize("TEST", output_sequence)); + ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", output_sequence)); + ASSERT_EQ(num_timesteps, + mpms::GetFeatureTimestampSize("OTHER", output_sequence)); + ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", output_sequence)); + for (int i = 0; i < num_timesteps; ++i) { + ASSERT_EQ(i, mpms::GetFeatureTimestampAt("TEST", output_sequence, i)); + ASSERT_THAT(mpms::GetFeatureBytesAt("TEST", output_sequence, i), + ::testing::ElementsAreArray( + std::vector(2, absl::StrCat("foo", 2 << i)))); + ASSERT_EQ(i, mpms::GetFeatureTimestampAt("OTHER", output_sequence, i)); + ASSERT_THAT(mpms::GetFeatureBytesAt("OTHER", output_sequence, i), + ::testing::ElementsAreArray( + std::vector(2, absl::StrCat("bar", 2 << i)))); + } +} + TEST_F(PackMediaSequenceCalculatorTest, OutputAsZeroTimestamp) { SetUpCalculator({"FLOAT_FEATURE_TEST:test"}, {}, false, true, true); auto input_sequence = ::absl::make_unique(); @@ -829,6 +879,45 @@ TEST_F(PackMediaSequenceCalculatorTest, TestReplacingFloatVectors) { ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence)); } +TEST_F(PackMediaSequenceCalculatorTest, TestReplacingBytesVectors) { + SetUpCalculator({"BYTES_FEATURE_TEST:test", "BYTES_FEATURE_OTHER:test2"}, {}, + false, true); + auto input_sequence = ::absl::make_unique(); + + int num_timesteps = 2; + for (int i = 0; i < num_timesteps; ++i) { + auto vs_ptr = ::absl::make_unique>( + 2, absl::StrCat("foo", 2 << i)); + mpms::AddFeatureBytes("TEST", *vs_ptr, input_sequence.get()); + mpms::AddFeatureTimestamp("TEST", i, input_sequence.get()); + vs_ptr = ::absl::make_unique>( + 2, absl::StrCat("bar", 2 << i)); + mpms::AddFeatureBytes("OTHER", *vs_ptr, input_sequence.get()); + mpms::AddFeatureTimestamp("OTHER", i, input_sequence.get()); + } + ASSERT_EQ(num_timesteps, + mpms::GetFeatureTimestampSize("TEST", *input_sequence)); + ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("TEST", *input_sequence)); + ASSERT_EQ(num_timesteps, + mpms::GetFeatureTimestampSize("OTHER", *input_sequence)); + ASSERT_EQ(num_timesteps, mpms::GetFeatureBytesSize("OTHER", *input_sequence)); + runner_->MutableSidePackets()->Tag(kSequenceExampleTag) = + Adopt(input_sequence.release()); + + MP_ASSERT_OK(runner_->Run()); + + const std::vector& output_packets = + runner_->Outputs().Tag(kSequenceExampleTag).packets; + ASSERT_EQ(1, output_packets.size()); + const tf::SequenceExample& output_sequence = + output_packets[0].Get(); + + ASSERT_EQ(0, mpms::GetFeatureTimestampSize("TEST", output_sequence)); + ASSERT_EQ(0, mpms::GetFeatureFloatsSize("TEST", output_sequence)); + ASSERT_EQ(0, mpms::GetFeatureTimestampSize("OTHER", output_sequence)); + ASSERT_EQ(0, mpms::GetFeatureFloatsSize("OTHER", output_sequence)); +} + TEST_F(PackMediaSequenceCalculatorTest, TestReconcilingAnnotations) { SetUpCalculator({"IMAGE:images"}, {}, false, true); auto input_sequence = ::absl::make_unique(); diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index 2d1037d20..55616bb83 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -162,6 +162,27 @@ selects.config_setting_group( ], ) +config_setting( + name = "edge_tpu_usb", + define_values = { + "MEDIAPIPE_EDGE_TPU": "usb", + }, +) + +config_setting( + name = "edge_tpu_pci", + define_values = { + "MEDIAPIPE_EDGE_TPU": "pci", + }, +) + +config_setting( + name = "edge_tpu_all", + define_values = { + "MEDIAPIPE_EDGE_TPU": "all", + }, +) + cc_library( name = "tflite_inference_calculator", srcs = ["tflite_inference_calculator.cc"], @@ -172,6 +193,12 @@ cc_library( ], "//conditions:default": [], }), + defines = select({ + "//conditions:default": [], + ":edge_tpu_usb": ["MEDIAPIPE_EDGE_TPU=usb"], + ":edge_tpu_pci": ["MEDIAPIPE_EDGE_TPU=pci"], + ":edge_tpu_all": ["MEDIAPIPE_EDGE_TPU=all"], + }), linkopts = select({ "//mediapipe:ios": [ "-framework CoreVideo", @@ -223,6 +250,20 @@ cc_library( "//conditions:default": [ "//mediapipe/util:cpu_util", ], + }) + select({ + "//conditions:default": [], + ":edge_tpu_usb": [ + "@libedgetpu//tflite/public:edgetpu", + "@libedgetpu//tflite/public:oss_edgetpu_direct_usb", + ], + ":edge_tpu_pci": [ + "@libedgetpu//tflite/public:edgetpu", + "@libedgetpu//tflite/public:oss_edgetpu_direct_pci", + ], + ":edge_tpu_all": [ + "@libedgetpu//tflite/public:edgetpu", + "@libedgetpu//tflite/public:oss_edgetpu_direct_all", + ], }), alwayslink = 1, ) diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 633300a73..8e83f3e44 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -85,7 +85,22 @@ constexpr char kTensorsGpuTag[] = "TENSORS_GPU"; } // namespace #if defined(MEDIAPIPE_EDGE_TPU) -#include "edgetpu.h" +#include "tflite/public/edgetpu.h" + +// Checkes whether model contains Edge TPU custom op or not. +bool ContainsEdgeTpuCustomOp(const tflite::FlatBufferModel& model) { + const auto* opcodes = model.GetModel()->operator_codes(); + for (const auto* subgraph : *model.GetModel()->subgraphs()) { + for (const auto* op : *subgraph->operators()) { + const auto* opcode = opcodes->Get(op->opcode_index()); + if (opcode->custom_code() && + opcode->custom_code()->str() == edgetpu::kCustomOp) { + return true; + } + } + } + return false; +} // Creates and returns an Edge TPU interpreter to run the given edgetpu model. std::unique_ptr BuildEdgeTpuInterpreter( @@ -94,14 +109,9 @@ std::unique_ptr BuildEdgeTpuInterpreter( edgetpu::EdgeTpuContext* edgetpu_context) { resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp()); std::unique_ptr interpreter; - if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) { - std::cerr << "Failed to build edge TPU interpreter." << std::endl; - } + CHECK_EQ(tflite::InterpreterBuilder(model, *resolver)(&interpreter), + kTfLiteOk); interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context); - interpreter->SetNumThreads(1); - if (interpreter->AllocateTensors() != kTfLiteOk) { - std::cerr << "Failed to allocate edge TPU tensors." << std::endl; - } return interpreter; } #endif // MEDIAPIPE_EDGE_TPU @@ -279,8 +289,7 @@ class TfLiteInferenceCalculator : public CalculatorBase { #endif // MEDIAPIPE_TFLITE_GL_INFERENCE #if defined(MEDIAPIPE_EDGE_TPU) - std::shared_ptr edgetpu_context_ = - edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice(); + std::shared_ptr edgetpu_context_; #endif bool gpu_inference_ = false; @@ -303,6 +312,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // Calculator Core Section namespace { + +constexpr char kCustomOpResolverTag[] = "CUSTOM_OP_RESOLVER"; +constexpr char kModelTag[] = "MODEL"; + template bool ShouldUseGpu(CC* cc) { #if MEDIAPIPE_TFLITE_GPU_SUPPORTED @@ -327,7 +340,7 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) { const auto& options = cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>(); RET_CHECK(!options.model_path().empty() ^ - cc->InputSidePackets().HasTag("MODEL")) + cc->InputSidePackets().HasTag(kModelTag)) << "Either model as side packet or model path in options is required."; if (cc->Inputs().HasTag(kTensorsTag)) @@ -340,13 +353,13 @@ absl::Status TfLiteInferenceCalculator::GetContract(CalculatorContract* cc) { if (cc->Outputs().HasTag(kTensorsGpuTag)) cc->Outputs().Tag(kTensorsGpuTag).Set>(); - if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { + if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) { cc->InputSidePackets() - .Tag("CUSTOM_OP_RESOLVER") + .Tag(kCustomOpResolverTag) .Set(); } - if (cc->InputSidePackets().HasTag("MODEL")) { - cc->InputSidePackets().Tag("MODEL").Set(); + if (cc->InputSidePackets().HasTag(kModelTag)) { + cc->InputSidePackets().Tag(kModelTag).Set(); } if (ShouldUseGpu(cc)) { @@ -486,8 +499,8 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { MP_RETURN_IF_ERROR(WriteKernelsToFile()); return RunInContextIfNeeded([this]() -> absl::Status { + interpreter_ = nullptr; if (delegate_) { - interpreter_ = nullptr; delegate_ = nullptr; #if MEDIAPIPE_TFLITE_GPU_SUPPORTED if (gpu_inference_) { @@ -501,7 +514,7 @@ absl::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { #endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED } #if defined(MEDIAPIPE_EDGE_TPU) - edgetpu_context_.reset(); + edgetpu_context_ = nullptr; #endif return absl::OkStatus(); }); @@ -723,9 +736,9 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner( auto op_resolver_ptr = static_cast( &default_op_resolver); - if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { + if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) { op_resolver_ptr = &(cc->InputSidePackets() - .Tag("CUSTOM_OP_RESOLVER") + .Tag(kCustomOpResolverTag) .Get()); } @@ -825,21 +838,26 @@ absl::Status TfLiteInferenceCalculator::LoadModel(CalculatorContext* cc) { tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates default_op_resolver; - auto op_resolver_ptr = - static_cast( - &default_op_resolver); - - if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { - op_resolver_ptr = &(cc->InputSidePackets() - .Tag("CUSTOM_OP_RESOLVER") - .Get()); - } - #if defined(MEDIAPIPE_EDGE_TPU) - interpreter_ = - BuildEdgeTpuInterpreter(model, op_resolver_ptr, edgetpu_context_.get()); -#else - tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_); + if (ContainsEdgeTpuCustomOp(model)) { + edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice(); + interpreter_ = BuildEdgeTpuInterpreter(model, &default_op_resolver, + edgetpu_context_.get()); + } else { +#endif // MEDIAPIPE_EDGE_TPU + auto op_resolver_ptr = + static_cast( + &default_op_resolver); + + if (cc->InputSidePackets().HasTag(kCustomOpResolverTag)) { + op_resolver_ptr = &(cc->InputSidePackets() + .Tag(kCustomOpResolverTag) + .Get()); + } + + tflite::InterpreterBuilder(model, *op_resolver_ptr)(&interpreter_); +#if defined(MEDIAPIPE_EDGE_TPU) + } #endif // MEDIAPIPE_EDGE_TPU RET_CHECK(interpreter_); @@ -872,8 +890,8 @@ absl::StatusOr TfLiteInferenceCalculator::GetModelAsPacket( if (!options.model_path().empty()) { return TfLiteModelLoader::LoadFromPath(options.model_path()); } - if (cc.InputSidePackets().HasTag("MODEL")) { - return cc.InputSidePackets().Tag("MODEL"); + if (cc.InputSidePackets().HasTag(kModelTag)) { + return cc.InputSidePackets().Tag(kModelTag); } return absl::Status(absl::StatusCode::kNotFound, "Must specify TFLite model as path or loaded model."); @@ -929,6 +947,8 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) { kTfLiteOk); return absl::OkStatus(); } +#else + (void)use_xnnpack; #endif // !EDGETPU // Return and use default tflite infernece (on CPU). No need for GPU diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index e759ff990..961cc620c 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -1353,3 +1353,34 @@ cc_test( "//mediapipe/framework/port:gtest_main", ], ) + +cc_library( + name = "inverse_matrix_calculator", + srcs = ["inverse_matrix_calculator.cc"], + hdrs = ["inverse_matrix_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/api2:port", + "@com_google_absl//absl/status", + "@eigen_archive//:eigen3", + ], + alwayslink = True, +) + +cc_test( + name = "inverse_matrix_calculator_test", + srcs = ["inverse_matrix_calculator_test.cc"], + tags = ["desktop_only_test"], + deps = [ + ":inverse_matrix_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:parse_text_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) diff --git a/mediapipe/calculators/util/from_image_calculator.cc b/mediapipe/calculators/util/from_image_calculator.cc index 7484d9257..0ddb342eb 100644 --- a/mediapipe/calculators/util/from_image_calculator.cc +++ b/mediapipe/calculators/util/from_image_calculator.cc @@ -33,6 +33,7 @@ namespace { constexpr char kImageFrameTag[] = "IMAGE_CPU"; constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kImageTag[] = "IMAGE"; +constexpr char kSourceOnGpuTag[] = "SOURCE_ON_GPU"; } // namespace // A calculator for converting the unified image container into @@ -46,6 +47,8 @@ constexpr char kImageTag[] = "IMAGE"; // IMAGE_CPU: An ImageFrame containing output image. // IMAGE_GPU: A GpuBuffer containing output image. // +// SOURCE_ON_GPU: The source Image is stored on GPU or CPU. +// // Note: // Data is automatically transferred to/from the CPU or GPU // depending on output type. @@ -66,6 +69,7 @@ class FromImageCalculator : public CalculatorBase { absl::Status RenderGpu(CalculatorContext* cc); absl::Status RenderCpu(CalculatorContext* cc); + bool check_image_source_ = false; bool gpu_output_ = false; bool gpu_initialized_ = false; #if !MEDIAPIPE_DISABLE_GPU @@ -102,6 +106,9 @@ absl::Status FromImageCalculator::GetContract(CalculatorContract* cc) { #endif // !MEDIAPIPE_DISABLE_GPU } + if (cc->Outputs().HasTag(kSourceOnGpuTag)) { + cc->Outputs().Tag(kSourceOnGpuTag).Set(); + } return absl::OkStatus(); } @@ -111,7 +118,9 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) { if (cc->Outputs().HasTag(kGpuBufferTag)) { gpu_output_ = true; } - + if (cc->Outputs().HasTag(kSourceOnGpuTag)) { + check_image_source_ = true; + } if (gpu_output_) { #if !MEDIAPIPE_DISABLE_GPU MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); @@ -122,6 +131,13 @@ absl::Status FromImageCalculator::Open(CalculatorContext* cc) { } absl::Status FromImageCalculator::Process(CalculatorContext* cc) { + if (check_image_source_) { + auto& input = cc->Inputs().Tag(kImageTag).Get(); + cc->Outputs() + .Tag(kSourceOnGpuTag) + .AddPacket(MakePacket(input.UsesGpu()).At(cc->InputTimestamp())); + } + if (gpu_output_) { #if !MEDIAPIPE_DISABLE_GPU MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([&cc]() -> absl::Status { diff --git a/mediapipe/calculators/util/inverse_matrix_calculator.cc b/mediapipe/calculators/util/inverse_matrix_calculator.cc new file mode 100644 index 000000000..5809623c0 --- /dev/null +++ b/mediapipe/calculators/util/inverse_matrix_calculator.cc @@ -0,0 +1,50 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/inverse_matrix_calculator.h" + +#include "Eigen/Core" +#include "Eigen/Geometry" +#include "Eigen/LU" +#include "absl/status/status.h" +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/calculator_framework.h" + +namespace mediapipe { +namespace api2 { + +class InverseMatrixCalculatorImpl : public NodeImpl { + absl::Status Process(mediapipe::CalculatorContext* cc) override { + if (kInputMatrix(cc).IsEmpty()) { + return absl::OkStatus(); + } + Eigen::Matrix matrix( + kInputMatrix(cc).Get().data()); + + Eigen::Matrix inverse_matrix; + bool inverse_check; + matrix.computeInverseWithCheck(inverse_matrix, inverse_check); + RET_CHECK(inverse_check) << "Inverse matrix cannot be calculated."; + + std::array output; + Eigen::Map>( + output.data(), 4, 4) = inverse_matrix.matrix(); + kOutputMatrix(cc).Send(std::move(output)); + return absl::OkStatus(); + } +}; +MEDIAPIPE_NODE_IMPLEMENTATION(InverseMatrixCalculatorImpl); + +} // namespace api2 +} // namespace mediapipe diff --git a/mediapipe/calculators/util/inverse_matrix_calculator.h b/mediapipe/calculators/util/inverse_matrix_calculator.h new file mode 100644 index 000000000..ba1657348 --- /dev/null +++ b/mediapipe/calculators/util/inverse_matrix_calculator.h @@ -0,0 +1,51 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_ + +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/api2/port.h" + +namespace mediapipe { + +// Runs affine transformation. +// +// Input: +// MATRIX - std::array +// Row major 4x4 matrix to inverse. +// +// Output: +// MATRIX - std::array +// Row major 4x4 inversed matrix. +// +// Usage example: +// node { +// calculator: "dishti.aimatter.InverseMatrixCalculator" +// input_stream: "MATRIX:input_matrix" +// output_stream: "MATRIX:output_matrix" +// } +class InverseMatrixCalculator : public mediapipe::api2::NodeIntf { + public: + static constexpr mediapipe::api2::Input> kInputMatrix{ + "MATRIX"}; + static constexpr mediapipe::api2::Output> kOutputMatrix{ + "MATRIX"}; + MEDIAPIPE_NODE_INTERFACE(InverseMatrixCalculator, kInputMatrix, + kOutputMatrix); +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_INVERSE_MATRIX_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/inverse_matrix_calculator_test.cc b/mediapipe/calculators/util/inverse_matrix_calculator_test.cc new file mode 100644 index 000000000..d3b629c78 --- /dev/null +++ b/mediapipe/calculators/util/inverse_matrix_calculator_test.cc @@ -0,0 +1,126 @@ +#include "mediapipe/calculators/util/inverse_matrix_calculator.h" + +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { +namespace { + +void RunTest(const std::array& matrix, + const std::array& expected_inverse_matrix) { + auto graph_config = mediapipe::ParseTextProtoOrDie( + R"pb( + input_stream: "matrix" + node { + calculator: "InverseMatrixCalculator" + input_stream: "MATRIX:matrix" + output_stream: "MATRIX:inverse_matrix" + } + )pb"); + + std::vector output_packets; + tool::AddVectorSink("inverse_matrix", &graph_config, &output_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + + MP_ASSERT_OK(graph.AddPacketToInputStream( + "matrix", + MakePacket>(std::move(matrix)).At(Timestamp(0)))); + + MP_ASSERT_OK(graph.WaitUntilIdle()); + ASSERT_THAT(output_packets, testing::SizeIs(1)); + + const auto& inverse_matrix = output_packets[0].Get>(); + + EXPECT_THAT(inverse_matrix, testing::Eq(expected_inverse_matrix)); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("matrix")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST(InverseMatrixCalculatorTest, Identity) { + // clang-format off + std::array matrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + std::array expected_inverse_matrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + // clang-format on + RunTest(matrix, expected_inverse_matrix); +} + +TEST(InverseMatrixCalculatorTest, Translation) { + // clang-format off + std::array matrix = { + 1.0f, 0.0f, 0.0f, 2.0f, + 0.0f, 1.0f, 0.0f, -5.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + std::array expected_inverse_matrix = { + 1.0f, 0.0f, 0.0f, -2.0f, + 0.0f, 1.0f, 0.0f, 5.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + // clang-format on + RunTest(matrix, expected_inverse_matrix); +} + +TEST(InverseMatrixCalculatorTest, Scale) { + // clang-format off + std::array matrix = { + 5.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 2.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + std::array expected_inverse_matrix = { + 0.2f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.5f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + // clang-format on + RunTest(matrix, expected_inverse_matrix); +} + +TEST(InverseMatrixCalculatorTest, Rotation90) { + // clang-format off + std::array matrix = { + 0.0f, -1.0f, 0.0f, 0.0f, + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + std::array expected_inverse_matrix = { + 0.0f, 1.0f, 0.0f, 0.0f, + -1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, + }; + // clang-format on + RunTest(matrix, expected_inverse_matrix); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/examples/android/solutions/create_win_symlinks.bat b/mediapipe/examples/android/solutions/create_win_symlinks.bat new file mode 100644 index 000000000..ea641b6e9 --- /dev/null +++ b/mediapipe/examples/android/solutions/create_win_symlinks.bat @@ -0,0 +1,16 @@ +@rem Remove the current res dir symlinks that are for Linux and macOS and recreate res dir symlinks for Windows. +@rem This script needs administrator permission. Must run this script as administrator. + +@rem for hands example app. +cd /d %~dp0 +cd hands\src\main +rm res +mklink /d res ..\..\..\res + +@rem for facemesh example app. +cd /d %~dp0 +cd facemesh\src\main +rm res +mklink /d res ..\..\..\res +dir +pause diff --git a/mediapipe/examples/android/solutions/facemesh/build.gradle b/mediapipe/examples/android/solutions/facemesh/build.gradle new file mode 100644 index 000000000..74aedf095 --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/build.gradle @@ -0,0 +1,50 @@ +plugins { + id 'com.android.application' +} + +android { + compileSdkVersion 30 + buildToolsVersion "30.0.3" + + defaultConfig { + applicationId "com.google.mediapipe.apps.hands" + minSdkVersion 21 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar']) + implementation 'androidx.appcompat:appcompat:1.3.0' + implementation 'com.google.android.material:material:1.3.0' + implementation 'androidx.constraintlayout:constraintlayout:2.0.4' + testImplementation 'junit:junit:4.+' + androidTestImplementation 'androidx.test.ext:junit:1.1.2' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' + // MediaPipe hands solution API and solution-core. + implementation 'com.google.mediapipe:solution-core:latest.release' + implementation 'com.google.mediapipe:facemesh:latest.release' + // MediaPipe deps + implementation 'com.google.flogger:flogger:latest.release' + implementation 'com.google.flogger:flogger-system-backend:latest.release' + implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.protobuf:protobuf-java:3.11.4' + // CameraX core library + def camerax_version = "1.0.0-beta10" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + implementation "androidx.camera:camera-lifecycle:$camerax_version" +} diff --git a/mediapipe/examples/android/solutions/facemesh/proguard-rules.pro b/mediapipe/examples/android/solutions/facemesh/proguard-rules.pro new file mode 100644 index 000000000..f1b424510 --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/AndroidManifest.xml b/mediapipe/examples/android/solutions/facemesh/src/main/AndroidManifest.xml new file mode 100644 index 000000000..de062995a --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/BUILD b/mediapipe/examples/android/solutions/facemesh/src/main/BUILD new file mode 100644 index 000000000..591102c3e --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/BUILD @@ -0,0 +1,44 @@ +# Copyright 2021 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:private"]) + +android_binary( + name = "facemesh", + srcs = glob(["**/*.java"]), + custom_package = "com.google.mediapipe.examples.facemesh", + manifest = "AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.examples.facemesh", + }, + multidex = "native", + resource_files = ["//mediapipe/examples/android/solutions:resource_files"], + deps = [ + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/solutioncore:camera_input", + "//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib", + "//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering", + "//mediapipe/java/com/google/mediapipe/solutioncore:video_input", + "//mediapipe/java/com/google/mediapipe/solutions/facemesh", + "//third_party:androidx_appcompat", + "//third_party:androidx_constraint_layout", + "//third_party:opencv", + "@maven//:androidx_activity_activity", + "@maven//:androidx_concurrent_concurrent_futures", + "@maven//:androidx_fragment_fragment", + "@maven//:com_google_guava_guava", + ], +) diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java new file mode 100644 index 000000000..fd6c533d3 --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultGlRenderer.java @@ -0,0 +1,186 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.examples.facemesh; + +import android.opengl.GLES20; +import android.opengl.Matrix; +import com.google.common.collect.ImmutableSet; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.solutioncore.ResultGlBoundary; +import com.google.mediapipe.solutioncore.ResultGlRenderer; +import com.google.mediapipe.solutions.facemesh.FaceMeshConnections; +import com.google.mediapipe.solutions.facemesh.FaceMeshResult; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.FloatBuffer; +import java.util.List; + +/** A custom implementation of {@link ResultGlRenderer} to render MediaPope FaceMesh results. */ +public class FaceMeshResultGlRenderer implements ResultGlRenderer { + private static final String TAG = "FaceMeshResultGlRenderer"; + + private static final float[] TESSELATION_COLOR = new float[] {0.75f, 0.75f, 0.75f, 0.5f}; + private static final int TESSELATION_THICKNESS = 5; + private static final float[] RIGHT_EYE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f}; + private static final int RIGHT_EYE_THICKNESS = 8; + private static final float[] RIGHT_EYEBROW_COLOR = new float[] {1f, 0.2f, 0.2f, 1f}; + private static final int RIGHT_EYEBROW_THICKNESS = 8; + private static final float[] LEFT_EYE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f}; + private static final int LEFT_EYE_THICKNESS = 8; + private static final float[] LEFT_EYEBROW_COLOR = new float[] {0.2f, 1f, 0.2f, 1f}; + private static final int LEFT_EYEBROW_THICKNESS = 8; + private static final float[] FACE_OVAL_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f}; + private static final int FACE_OVAL_THICKNESS = 8; + private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f}; + private static final int LIPS_THICKNESS = 8; + private static final String VERTEX_SHADER = + "uniform mat4 uTransformMatrix;\n" + + "attribute vec4 vPosition;\n" + + "void main() {\n" + + " gl_Position = uTransformMatrix * vPosition;\n" + + "}"; + private static final String FRAGMENT_SHADER = + "precision mediump float;\n" + + "uniform vec4 uColor;\n" + + "void main() {\n" + + " gl_FragColor = uColor;\n" + + "}"; + private int program; + private int positionHandle; + private int transformMatrixHandle; + private int colorHandle; + private final float[] transformMatrix = new float[16]; + + private int loadShader(int type, String shaderCode) { + int shader = GLES20.glCreateShader(type); + GLES20.glShaderSource(shader, shaderCode); + GLES20.glCompileShader(shader); + return shader; + } + + @Override + public void setupRendering() { + program = GLES20.glCreateProgram(); + int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER); + int fragmentShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER); + GLES20.glAttachShader(program, vertexShader); + GLES20.glAttachShader(program, fragmentShader); + GLES20.glLinkProgram(program); + positionHandle = GLES20.glGetAttribLocation(program, "vPosition"); + transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix"); + colorHandle = GLES20.glGetUniformLocation(program, "uColor"); + } + + @Override + public void renderResult(FaceMeshResult result, ResultGlBoundary boundary) { + if (result == null) { + return; + } + GLES20.glUseProgram(program); + // Sets the transform matrix to align the result rendering with the scaled output texture. + // Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the + // bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the + // top-left corner. + Matrix.setIdentityM(transformMatrix, 0); + Matrix.scaleM( + transformMatrix, + 0, + 2 / (boundary.right() - boundary.left()), + -2 / (boundary.top() - boundary.bottom()), + 1.0f); + GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0); + + int numFaces = result.multiFaceLandmarks().size(); + for (int i = 0; i < numFaces; ++i) { + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_TESSELATION, + TESSELATION_COLOR, + TESSELATION_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_RIGHT_EYE, + RIGHT_EYE_COLOR, + RIGHT_EYE_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_RIGHT_EYEBROW, + RIGHT_EYEBROW_COLOR, + RIGHT_EYEBROW_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LEFT_EYE, + LEFT_EYE_COLOR, + LEFT_EYE_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LEFT_EYEBR0W, + LEFT_EYEBROW_COLOR, + LEFT_EYEBROW_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_FACE_OVAL, + FACE_OVAL_COLOR, + FACE_OVAL_THICKNESS); + drawLandmarks( + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LIPS, + LIPS_COLOR, + LIPS_THICKNESS); + } + } + + /** + * Calls this to delete the shader program. + * + *

This is only necessary if one wants to release the program while keeping the context around. + */ + public void release() { + GLES20.glDeleteProgram(program); + } + + private void drawLandmarks( + List faceLandmarkList, + ImmutableSet connections, + float[] colorArray, + int thickness) { + GLES20.glUniform4fv(colorHandle, 1, colorArray, 0); + GLES20.glLineWidth(thickness); + for (FaceMeshConnections.Connection c : connections) { + float[] vertex = new float[4]; + NormalizedLandmark start = faceLandmarkList.get(c.start()); + vertex[0] = normalizedLandmarkValue(start.getX()); + vertex[1] = normalizedLandmarkValue(start.getY()); + NormalizedLandmark end = faceLandmarkList.get(c.end()); + vertex[2] = normalizedLandmarkValue(end.getX()); + vertex[3] = normalizedLandmarkValue(end.getY()); + FloatBuffer vertexBuffer = + ByteBuffer.allocateDirect(vertex.length * 4) + .order(ByteOrder.nativeOrder()) + .asFloatBuffer() + .put(vertex); + vertexBuffer.position(0); + GLES20.glEnableVertexAttribArray(positionHandle); + GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer); + GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2); + } + } + + // Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate + // value range: [-1, 1]. + private float normalizedLandmarkValue(float value) { + return value * 2 - 1; + } +} diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java new file mode 100644 index 000000000..9db91a8e3 --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java @@ -0,0 +1,158 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.examples.facemesh; + +import android.content.Context; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Matrix; +import android.graphics.Paint; +import androidx.appcompat.widget.AppCompatImageView; +import android.util.Size; +import com.google.common.collect.ImmutableSet; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.solutions.facemesh.FaceMeshConnections; +import com.google.mediapipe.solutions.facemesh.FaceMeshResult; +import java.util.List; + +/** An ImageView implementation for displaying MediaPipe FaceMesh results. */ +public class FaceMeshResultImageView extends AppCompatImageView { + private static final String TAG = "FaceMeshResultImageView"; + + private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0"); + private static final int TESSELATION_THICKNESS = 5; + private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030"); + private static final int RIGHT_EYE_THICKNESS = 8; + private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030"); + private static final int RIGHT_EYEBROW_THICKNESS = 8; + private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30"); + private static final int LEFT_EYE_THICKNESS = 8; + private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30"); + private static final int LEFT_EYEBROW_THICKNESS = 8; + private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0"); + private static final int FACE_OVAL_THICKNESS = 8; + private static final int LIPS_COLOR = Color.parseColor("#E0E0E0"); + private static final int LIPS_THICKNESS = 8; + private Bitmap latest; + + public FaceMeshResultImageView(Context context) { + super(context); + setScaleType(AppCompatImageView.ScaleType.FIT_CENTER); + } + + /** + * Sets a {@link FaceMeshResult} to render. + * + * @param result a {@link FaceMeshResult} object that contains the solution outputs and the input + * {@link Bitmap}. + */ + public void setFaceMeshResult(FaceMeshResult result) { + if (result == null) { + return; + } + Bitmap bmInput = result.inputBitmap(); + int width = bmInput.getWidth(); + int height = bmInput.getHeight(); + latest = Bitmap.createBitmap(width, height, bmInput.getConfig()); + Canvas canvas = new Canvas(latest); + Size imageSize = new Size(width, height); + canvas.drawBitmap(bmInput, new Matrix(), null); + int numFaces = result.multiFaceLandmarks().size(); + for (int i = 0; i < numFaces; ++i) { + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_TESSELATION, + imageSize, + TESSELATION_COLOR, + TESSELATION_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_RIGHT_EYE, + imageSize, + RIGHT_EYE_COLOR, + RIGHT_EYE_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_RIGHT_EYEBROW, + imageSize, + RIGHT_EYEBROW_COLOR, + RIGHT_EYEBROW_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LEFT_EYE, + imageSize, + LEFT_EYE_COLOR, + LEFT_EYE_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LEFT_EYEBR0W, + imageSize, + LEFT_EYEBROW_COLOR, + LEFT_EYEBROW_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_FACE_OVAL, + imageSize, + FACE_OVAL_COLOR, + FACE_OVAL_THICKNESS); + drawLandmarksOnCanvas( + canvas, + result.multiFaceLandmarks().get(i).getLandmarkList(), + FaceMeshConnections.FACEMESH_LIPS, + imageSize, + LIPS_COLOR, + LIPS_THICKNESS); + } + } + + /** Updates the image view with the latest facemesh result. */ + public void update() { + postInvalidate(); + if (latest != null) { + setImageBitmap(latest); + } + } + + // TODO: Better hand landmark and hand connection drawing. + private void drawLandmarksOnCanvas( + Canvas canvas, + List faceLandmarkList, + ImmutableSet connections, + Size imageSize, + int color, + int thickness) { + // Draw connections. + for (FaceMeshConnections.Connection c : connections) { + Paint connectionPaint = new Paint(); + connectionPaint.setColor(color); + connectionPaint.setStrokeWidth(thickness); + NormalizedLandmark start = faceLandmarkList.get(c.start()); + NormalizedLandmark end = faceLandmarkList.get(c.end()); + canvas.drawLine( + start.getX() * imageSize.getWidth(), + start.getY() * imageSize.getHeight(), + end.getX() * imageSize.getWidth(), + end.getY() * imageSize.getHeight(), + connectionPaint); + } + } +} diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/MainActivity.java b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/MainActivity.java new file mode 100644 index 000000000..27c89a93e --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/MainActivity.java @@ -0,0 +1,308 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.examples.facemesh; + +import android.content.Intent; +import android.graphics.Bitmap; +import android.os.Bundle; +import android.provider.MediaStore; +import androidx.appcompat.app.AppCompatActivity; +import android.util.Log; +import android.view.View; +import android.widget.Button; +import android.widget.FrameLayout; +import androidx.activity.result.ActivityResultLauncher; +import androidx.activity.result.contract.ActivityResultContracts; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.solutioncore.CameraInput; +import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; +import com.google.mediapipe.solutioncore.VideoInput; +import com.google.mediapipe.solutions.facemesh.FaceMesh; +import com.google.mediapipe.solutions.facemesh.FaceMeshOptions; +import com.google.mediapipe.solutions.facemesh.FaceMeshResult; +import java.io.IOException; + +/** Main activity of MediaPipe FaceMesh app. */ +public class MainActivity extends AppCompatActivity { + private static final String TAG = "MainActivity"; + + private FaceMesh facemesh; + // Run the pipeline and the model inference on GPU or CPU. + private static final boolean RUN_ON_GPU = true; + + private enum InputSource { + UNKNOWN, + IMAGE, + VIDEO, + CAMERA, + } + private InputSource inputSource = InputSource.UNKNOWN; + // Image demo UI and image loader components. + private ActivityResultLauncher imageGetter; + private FaceMeshResultImageView imageView; + // Video demo UI and video loader components. + private VideoInput videoInput; + private ActivityResultLauncher videoGetter; + // Live camera demo UI and camera components. + private CameraInput cameraInput; + private SolutionGlSurfaceView glSurfaceView; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + setupStaticImageDemoUiComponents(); + setupVideoDemoUiComponents(); + setupLiveDemoUiComponents(); + } + + @Override + protected void onResume() { + super.onResume(); + if (inputSource == InputSource.CAMERA) { + // Restarts the camera and the opengl surface rendering. + cameraInput = new CameraInput(this); + cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame)); + glSurfaceView.post(this::startCamera); + glSurfaceView.setVisibility(View.VISIBLE); + } else if (inputSource == InputSource.VIDEO) { + videoInput.resume(); + } + } + + @Override + protected void onPause() { + super.onPause(); + if (inputSource == InputSource.CAMERA) { + glSurfaceView.setVisibility(View.GONE); + cameraInput.close(); + } else if (inputSource == InputSource.VIDEO) { + videoInput.pause(); + } + } + + /** Sets up the UI components for the static image demo. */ + private void setupStaticImageDemoUiComponents() { + // The Intent to access gallery and read images as bitmap. + imageGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + Bitmap bitmap = null; + try { + bitmap = + MediaStore.Images.Media.getBitmap( + this.getContentResolver(), resultIntent.getData()); + } catch (IOException e) { + Log.e(TAG, "Bitmap reading error:" + e); + } + if (bitmap != null) { + facemesh.send(bitmap); + } + } + } + }); + Button loadImageButton = findViewById(R.id.button_load_picture); + loadImageButton.setOnClickListener( + v -> { + if (inputSource != InputSource.IMAGE) { + stopCurrentPipeline(); + setupStaticImageModePipeline(); + } + // Reads images from gallery. + Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); + imageGetter.launch(gallery); + }); + imageView = new FaceMeshResultImageView(this); + } + + /** The core MediaPipe FaceMesh setup workflow for its static image mode. */ + private void setupStaticImageModePipeline() { + this.inputSource = InputSource.IMAGE; + // Initializes a new MediaPipe FaceMesh instance in the static image mode. + facemesh = + new FaceMesh( + this, + FaceMeshOptions.builder() + .setMode(FaceMeshOptions.STATIC_IMAGE_MODE) + .setRunOnGpu(RUN_ON_GPU) + .build()); + + // Connects MediaPipe FaceMesh to the user-defined FaceMeshResultImageView. + facemesh.setResultListener( + faceMeshResult -> { + logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true); + imageView.setFaceMeshResult(faceMeshResult); + runOnUiThread(() -> imageView.update()); + }); + facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); + + // Updates the preview layout. + FrameLayout frameLayout = findViewById(R.id.preview_display_layout); + frameLayout.removeAllViewsInLayout(); + imageView.setImageDrawable(null); + frameLayout.addView(imageView); + imageView.setVisibility(View.VISIBLE); + } + + /** Sets up the UI components for the video demo. */ + private void setupVideoDemoUiComponents() { + // The Intent to access gallery and read a video file. + videoGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + glSurfaceView.post( + () -> + videoInput.start( + this, + resultIntent.getData(), + facemesh.getGlContext(), + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); + } + } + }); + Button loadVideoButton = findViewById(R.id.button_load_video); + loadVideoButton.setOnClickListener( + v -> { + stopCurrentPipeline(); + setupStreamingModePipeline(InputSource.VIDEO); + // Reads video from gallery. + Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); + videoGetter.launch(gallery); + }); + } + + /** Sets up the UI components for the live demo with camera input. */ + private void setupLiveDemoUiComponents() { + Button startCameraButton = findViewById(R.id.button_start_camera); + startCameraButton.setOnClickListener( + v -> { + if (inputSource == InputSource.CAMERA) { + return; + } + stopCurrentPipeline(); + setupStreamingModePipeline(InputSource.CAMERA); + }); + } + + /** The core MediaPipe FaceMesh setup workflow for its streaming mode. */ + private void setupStreamingModePipeline(InputSource inputSource) { + this.inputSource = inputSource; + // Initializes a new MediaPipe FaceMesh instance in the streaming mode. + facemesh = + new FaceMesh( + this, + FaceMeshOptions.builder() + .setMode(FaceMeshOptions.STREAMING_MODE) + .setRunOnGpu(RUN_ON_GPU) + .build()); + facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); + + if (inputSource == InputSource.CAMERA) { + // Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh. + cameraInput = new CameraInput(this); + cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame)); + } else if (inputSource == InputSource.VIDEO) { + // Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh. + videoInput = new VideoInput(this); + videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame)); + } + + // Initializes a new Gl surface view with a user-defined FaceMeshResultGlRenderer. + glSurfaceView = + new SolutionGlSurfaceView<>(this, facemesh.getGlContext(), facemesh.getGlMajorVersion()); + glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer()); + glSurfaceView.setRenderInputImage(true); + facemesh.setResultListener( + faceMeshResult -> { + logNoseLandmark(faceMeshResult, /*showPixelValues=*/ false); + glSurfaceView.setRenderData(faceMeshResult); + glSurfaceView.requestRender(); + }); + + // The runnable to start camera after the gl surface view is attached. + // For video input source, videoInput.start() will be called when the video uri is available. + if (inputSource == InputSource.CAMERA) { + glSurfaceView.post(this::startCamera); + } + + // Updates the preview layout. + FrameLayout frameLayout = findViewById(R.id.preview_display_layout); + imageView.setVisibility(View.GONE); + frameLayout.removeAllViewsInLayout(); + frameLayout.addView(glSurfaceView); + glSurfaceView.setVisibility(View.VISIBLE); + frameLayout.requestLayout(); + } + + private void startCamera() { + cameraInput.start( + this, + facemesh.getGlContext(), + CameraInput.CameraFacing.FRONT, + glSurfaceView.getWidth(), + glSurfaceView.getHeight()); + } + + private void stopCurrentPipeline() { + if (cameraInput != null) { + cameraInput.setNewFrameListener(null); + cameraInput.close(); + } + if (videoInput != null) { + videoInput.setNewFrameListener(null); + videoInput.close(); + } + if (glSurfaceView != null) { + glSurfaceView.setVisibility(View.GONE); + } + if (facemesh != null) { + facemesh.close(); + } + } + + private void logNoseLandmark(FaceMeshResult result, boolean showPixelValues) { + if (result == null || result.multiFaceLandmarks().isEmpty()) { + return; + } + NormalizedLandmark noseLandmark = result.multiFaceLandmarks().get(0).getLandmarkList().get(1); + // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates. + if (showPixelValues) { + int width = result.inputBitmap().getWidth(); + int height = result.inputBitmap().getHeight(); + Log.i( + TAG, + String.format( + "MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f", + noseLandmark.getX() * width, noseLandmark.getY() * height)); + } else { + Log.i( + TAG, + String.format( + "MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", + noseLandmark.getX(), noseLandmark.getY())); + } + } +} diff --git a/mediapipe/examples/android/solutions/facemesh/src/main/res b/mediapipe/examples/android/solutions/facemesh/src/main/res new file mode 120000 index 000000000..fc8850136 --- /dev/null +++ b/mediapipe/examples/android/solutions/facemesh/src/main/res @@ -0,0 +1 @@ +../../../res \ No newline at end of file diff --git a/mediapipe/examples/android/solutions/hands/src/main/AndroidManifest.xml b/mediapipe/examples/android/solutions/hands/src/main/AndroidManifest.xml index 0d70af7ff..4537a2537 100644 --- a/mediapipe/examples/android/solutions/hands/src/main/AndroidManifest.xml +++ b/mediapipe/examples/android/solutions/hands/src/main/AndroidManifest.xml @@ -19,7 +19,8 @@ android:roundIcon="@mipmap/ic_launcher_round" android:supportsRtl="true" android:theme="@style/AppTheme"> - + diff --git a/mediapipe/examples/android/solutions/hands/src/main/BUILD b/mediapipe/examples/android/solutions/hands/src/main/BUILD index c4bb724e0..0d71e4a95 100644 --- a/mediapipe/examples/android/solutions/hands/src/main/BUILD +++ b/mediapipe/examples/android/solutions/hands/src/main/BUILD @@ -31,10 +31,14 @@ android_binary( "//mediapipe/java/com/google/mediapipe/solutioncore:camera_input", "//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib", "//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering", + "//mediapipe/java/com/google/mediapipe/solutioncore:video_input", "//mediapipe/java/com/google/mediapipe/solutions/hands", "//third_party:androidx_appcompat", "//third_party:androidx_constraint_layout", + "//third_party:opencv", + "@maven//:androidx_activity_activity", "@maven//:androidx_concurrent_concurrent_futures", + "@maven//:androidx_fragment_fragment", "@maven//:com_google_guava_guava", ], ) diff --git a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java index ec61110ed..720ae5509 100644 --- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java +++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultGlRenderer.java @@ -46,7 +46,6 @@ public class HandsResultGlRenderer implements ResultGlRenderer { private int positionHandle; private int transformMatrixHandle; private final float[] transformMatrix = new float[16]; - private FloatBuffer vertexBuffer; private int loadShader(int type, String shaderCode) { int shader = GLES20.glCreateShader(type); @@ -74,12 +73,15 @@ public class HandsResultGlRenderer implements ResultGlRenderer { } GLES20.glUseProgram(program); // Sets the transform matrix to align the result rendering with the scaled output texture. + // Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the + // bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the + // top-left corner. Matrix.setIdentityM(transformMatrix, 0); Matrix.scaleM( transformMatrix, 0, 2 / (boundary.right() - boundary.left()), - 2 / (boundary.top() - boundary.bottom()), + -2 / (boundary.top() - boundary.bottom()), 1.0f); GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0); GLES20.glLineWidth(CONNECTION_THICKNESS); @@ -109,7 +111,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer { NormalizedLandmark end = handLandmarkList.get(c.end()); vertex[2] = normalizedLandmarkValue(end.getX()); vertex[3] = normalizedLandmarkValue(end.getY()); - vertexBuffer = + FloatBuffer vertexBuffer = ByteBuffer.allocateDirect(vertex.length * 4) .order(ByteOrder.nativeOrder()) .asFloatBuffer() diff --git a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java index 35dbc1848..d4052d4e9 100644 --- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java +++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java @@ -20,7 +20,7 @@ import android.graphics.Canvas; import android.graphics.Color; import android.graphics.Matrix; import android.graphics.Paint; -import android.widget.ImageView; +import androidx.appcompat.widget.AppCompatImageView; import com.google.mediapipe.formats.proto.LandmarkProto; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.solutions.hands.Hands; @@ -28,17 +28,18 @@ import com.google.mediapipe.solutions.hands.HandsResult; import java.util.List; /** An ImageView implementation for displaying MediaPipe Hands results. */ -public class HandsResultImageView extends ImageView { +public class HandsResultImageView extends AppCompatImageView { private static final String TAG = "HandsResultImageView"; private static final int LANDMARK_COLOR = Color.RED; private static final int LANDMARK_RADIUS = 15; private static final int CONNECTION_COLOR = Color.GREEN; private static final int CONNECTION_THICKNESS = 10; + private Bitmap latest; public HandsResultImageView(Context context) { super(context); - setScaleType(ImageView.ScaleType.FIT_CENTER); + setScaleType(AppCompatImageView.ScaleType.FIT_CENTER); } /** @@ -54,8 +55,8 @@ public class HandsResultImageView extends ImageView { Bitmap bmInput = result.inputBitmap(); int width = bmInput.getWidth(); int height = bmInput.getHeight(); - Bitmap bmOutput = Bitmap.createBitmap(width, height, bmInput.getConfig()); - Canvas canvas = new Canvas(bmOutput); + latest = Bitmap.createBitmap(width, height, bmInput.getConfig()); + Canvas canvas = new Canvas(latest); canvas.drawBitmap(bmInput, new Matrix(), null); int numHands = result.multiHandLandmarks().size(); @@ -63,8 +64,14 @@ public class HandsResultImageView extends ImageView { drawLandmarksOnCanvas( result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height); } + } + + /** Updates the image view with the latest hands result. */ + public void update() { postInvalidate(); - setImageBitmap(bmOutput); + if (latest != null) { + setImageBitmap(latest); + } } // TODO: Better hand landmark and hand connection drawing. diff --git a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java index 8828c0240..379219942 100644 --- a/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java +++ b/mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/MainActivity.java @@ -28,6 +28,7 @@ import androidx.activity.result.contract.ActivityResultContracts; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; +import com.google.mediapipe.solutioncore.VideoInput; import com.google.mediapipe.solutions.hands.HandLandmark; import com.google.mediapipe.solutions.hands.Hands; import com.google.mediapipe.solutions.hands.HandsOptions; @@ -39,14 +40,24 @@ public class MainActivity extends AppCompatActivity { private static final String TAG = "MainActivity"; private Hands hands; - private int mode = HandsOptions.STATIC_IMAGE_MODE; + // Run the pipeline and the model inference on GPU or CPU. + private static final boolean RUN_ON_GPU = true; + + private enum InputSource { + UNKNOWN, + IMAGE, + VIDEO, + CAMERA, + } + private InputSource inputSource = InputSource.UNKNOWN; + // Image demo UI and image loader components. - private Button loadImageButton; private ActivityResultLauncher imageGetter; private HandsResultImageView imageView; - + // Video demo UI and video loader components. + private VideoInput videoInput; + private ActivityResultLauncher videoGetter; // Live camera demo UI and camera components. - private Button startCameraButton; private CameraInput cameraInput; private SolutionGlSurfaceView glSurfaceView; @@ -55,26 +66,32 @@ public class MainActivity extends AppCompatActivity { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); setupStaticImageDemoUiComponents(); + setupVideoDemoUiComponents(); setupLiveDemoUiComponents(); } @Override protected void onResume() { super.onResume(); - if (mode == HandsOptions.STREAMING_MODE) { + if (inputSource == InputSource.CAMERA) { // Restarts the camera and the opengl surface rendering. cameraInput = new CameraInput(this); - cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame)); + cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame)); glSurfaceView.post(this::startCamera); glSurfaceView.setVisibility(View.VISIBLE); + } else if (inputSource == InputSource.VIDEO) { + videoInput.resume(); } } @Override protected void onPause() { super.onPause(); - if (mode == HandsOptions.STREAMING_MODE) { - stopLiveDemo(); + if (inputSource == InputSource.CAMERA) { + glSurfaceView.setVisibility(View.GONE); + cameraInput.close(); + } else if (inputSource == InputSource.VIDEO) { + videoInput.pause(); } } @@ -102,80 +119,122 @@ public class MainActivity extends AppCompatActivity { } } }); - loadImageButton = (Button) findViewById(R.id.button_load_picture); + Button loadImageButton = findViewById(R.id.button_load_picture); loadImageButton.setOnClickListener( - new View.OnClickListener() { - @Override - public void onClick(View v) { - if (mode == HandsOptions.STREAMING_MODE) { - stopLiveDemo(); - } - if (hands == null || mode != HandsOptions.STATIC_IMAGE_MODE) { - setupStaticImageModePipeline(); - } - // Reads images from gallery. - Intent gallery = - new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); - imageGetter.launch(gallery); + v -> { + if (inputSource != InputSource.IMAGE) { + stopCurrentPipeline(); + setupStaticImageModePipeline(); } + // Reads images from gallery. + Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); + imageGetter.launch(gallery); }); imageView = new HandsResultImageView(this); } /** The core MediaPipe Hands setup workflow for its static image mode. */ private void setupStaticImageModePipeline() { + this.inputSource = InputSource.IMAGE; // Initializes a new MediaPipe Hands instance in the static image mode. - mode = HandsOptions.STATIC_IMAGE_MODE; - if (hands != null) { - hands.close(); - } - hands = new Hands(this, HandsOptions.builder().setMode(mode).build()); + hands = + new Hands( + this, + HandsOptions.builder() + .setMode(HandsOptions.STATIC_IMAGE_MODE) + .setMaxNumHands(1) + .setRunOnGpu(RUN_ON_GPU) + .build()); // Connects MediaPipe Hands to the user-defined HandsResultImageView. hands.setResultListener( handsResult -> { logWristLandmark(handsResult, /*showPixelValues=*/ true); - runOnUiThread(() -> imageView.setHandsResult(handsResult)); + imageView.setHandsResult(handsResult); + runOnUiThread(() -> imageView.update()); }); - hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message)); + hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); // Updates the preview layout. - FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout); + FrameLayout frameLayout = findViewById(R.id.preview_display_layout); frameLayout.removeAllViewsInLayout(); imageView.setImageDrawable(null); frameLayout.addView(imageView); imageView.setVisibility(View.VISIBLE); } + /** Sets up the UI components for the video demo. */ + private void setupVideoDemoUiComponents() { + // The Intent to access gallery and read a video file. + videoGetter = + registerForActivityResult( + new ActivityResultContracts.StartActivityForResult(), + result -> { + Intent resultIntent = result.getData(); + if (resultIntent != null) { + if (result.getResultCode() == RESULT_OK) { + glSurfaceView.post( + () -> + videoInput.start( + this, + resultIntent.getData(), + hands.getGlContext(), + glSurfaceView.getWidth(), + glSurfaceView.getHeight())); + } + } + }); + Button loadVideoButton = findViewById(R.id.button_load_video); + loadVideoButton.setOnClickListener( + v -> { + stopCurrentPipeline(); + setupStreamingModePipeline(InputSource.VIDEO); + // Reads video from gallery. + Intent gallery = + new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); + videoGetter.launch(gallery); + }); + } + /** Sets up the UI components for the live demo with camera input. */ private void setupLiveDemoUiComponents() { - startCameraButton = (Button) findViewById(R.id.button_start_camera); + Button startCameraButton = findViewById(R.id.button_start_camera); startCameraButton.setOnClickListener( - new View.OnClickListener() { - @Override - public void onClick(View v) { - if (hands == null || mode != HandsOptions.STREAMING_MODE) { - setupStreamingModePipeline(); - } + v -> { + if (inputSource == InputSource.CAMERA) { + return; } + stopCurrentPipeline(); + setupStreamingModePipeline(InputSource.CAMERA); }); } /** The core MediaPipe Hands setup workflow for its streaming mode. */ - private void setupStreamingModePipeline() { + private void setupStreamingModePipeline(InputSource inputSource) { + this.inputSource = inputSource; // Initializes a new MediaPipe Hands instance in the streaming mode. - mode = HandsOptions.STREAMING_MODE; - if (hands != null) { - hands.close(); + hands = + new Hands( + this, + HandsOptions.builder() + .setMode(HandsOptions.STREAMING_MODE) + .setMaxNumHands(1) + .setRunOnGpu(RUN_ON_GPU) + .build()); + hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); + + if (inputSource == InputSource.CAMERA) { + // Initializes a new CameraInput instance and connects it to MediaPipe Hands. + cameraInput = new CameraInput(this); + cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame)); + } else if (inputSource == InputSource.VIDEO) { + // Initializes a new VideoInput instance and connects it to MediaPipe Hands. + videoInput = new VideoInput(this); + videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame)); } - hands = new Hands(this, HandsOptions.builder().setMode(mode).build()); - hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe hands error:" + message)); - // Initializes a new CameraInput instance and connects it to MediaPipe Hands. - cameraInput = new CameraInput(this); - cameraInput.setCameraNewFrameListener(textureFrame -> hands.send(textureFrame)); - - // Initalizes a new Gl surface view with a user-defined HandsResultGlRenderer. + // Initializes a new Gl surface view with a user-defined HandsResultGlRenderer. glSurfaceView = new SolutionGlSurfaceView<>(this, hands.getGlContext(), hands.getGlMajorVersion()); glSurfaceView.setSolutionResultRenderer(new HandsResultGlRenderer()); @@ -188,10 +247,13 @@ public class MainActivity extends AppCompatActivity { }); // The runnable to start camera after the gl surface view is attached. - glSurfaceView.post(this::startCamera); + // For video input source, videoInput.start() will be called when the video uri is available. + if (inputSource == InputSource.CAMERA) { + glSurfaceView.post(this::startCamera); + } // Updates the preview layout. - FrameLayout frameLayout = (FrameLayout) findViewById(R.id.preview_display_layout); + FrameLayout frameLayout = findViewById(R.id.preview_display_layout); imageView.setVisibility(View.GONE); frameLayout.removeAllViewsInLayout(); frameLayout.addView(glSurfaceView); @@ -208,34 +270,40 @@ public class MainActivity extends AppCompatActivity { glSurfaceView.getHeight()); } - private void stopLiveDemo() { + private void stopCurrentPipeline() { if (cameraInput != null) { - cameraInput.stop(); + cameraInput.setNewFrameListener(null); + cameraInput.close(); + } + if (videoInput != null) { + videoInput.setNewFrameListener(null); + videoInput.close(); } if (glSurfaceView != null) { glSurfaceView.setVisibility(View.GONE); } + if (hands != null) { + hands.close(); + } } private void logWristLandmark(HandsResult result, boolean showPixelValues) { NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST); - // For Bitmaps, show the pixel values. For texture inputs, show the normoralized cooridanates. + // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates. if (showPixelValues) { int width = result.inputBitmap().getWidth(); int height = result.inputBitmap().getHeight(); Log.i( TAG, - "MediaPipe Hand wrist coordinates (pixel values): x= " - + wristLandmark.getX() * width - + " y=" - + wristLandmark.getY() * height); + String.format( + "MediaPipe Hand wrist coordinates (pixel values): x=%f, y=%f", + wristLandmark.getX() * width, wristLandmark.getY() * height)); } else { Log.i( TAG, - "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x= " - + wristLandmark.getX() - + " y=" - + wristLandmark.getY()); + String.format( + "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f", + wristLandmark.getX(), wristLandmark.getY())); } } } diff --git a/mediapipe/examples/android/solutions/res/layout/activity_main.xml b/mediapipe/examples/android/solutions/res/layout/activity_main.xml index e14f12871..834e9a3e6 100644 --- a/mediapipe/examples/android/solutions/res/layout/activity_main.xml +++ b/mediapipe/examples/android/solutions/res/layout/activity_main.xml @@ -8,18 +8,23 @@ android:id="@+id/buttons" android:layout_width="match_parent" android:layout_height="wrap_content" - android:gravity="center" + style="?android:attr/buttonBarStyle" android:gravity="center" android:orientation="horizontal">