diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..9c033288b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +global-exclude .git* +global-exclude *_test.py + +recursive-include mediapipe/models *.tflite *.txt +recursive-include mediapipe/modules *.tflite *.txt +recursive-include mediapipe/graphs *.binarypb diff --git a/README.md b/README.md index cef6213dd..10f1d4ee6 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Iris | Hands -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: -[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) +Face Detection | Face Mesh | Iris πŸ†• | Hands | Pose πŸ†• +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :----: +[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](docs/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT :-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: @@ -33,20 +33,21 @@ Hair Segmentation -[]() | Android | iOS | Desktop | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | βœ… | βœ… -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | -[Iris](https://google.github.io/mediapipe/solutions/iris) | βœ… | βœ… | βœ… | βœ… | -[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | βœ… | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | βœ… | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | βœ… -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | | βœ… | βœ… +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) πŸ†• | βœ… | βœ… | βœ… | | βœ… | +[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | | βœ… | +[Pose](https://google.github.io/mediapipe/solutions/pose) πŸ†• | βœ… | βœ… | βœ… | βœ… | βœ… | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | | βœ… | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | | βœ… +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | | ## MediaPipe on the Web @@ -68,6 +69,7 @@ never leaves your device. * [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) +* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) ## Getting started @@ -86,8 +88,10 @@ run code search using ## Publications -* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single - Image](https://mediapipe.page.link/iris-blog) in Google AI Blog +* [bazelPose - On-device Real-time Body Pose Tracking](https://mediapipe.page.link/bazelpose-blog) + in Google AI Blog +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) + in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) diff --git a/WORKSPACE b/WORKSPACE index 8210d786e..a049c470c 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -137,6 +137,25 @@ http_archive( urls = ["https://github.com/google/multichannel-audio-tools/archive/master.zip"], ) +# 2020-07-09 +http_archive( + name = "pybind11_bazel", + strip_prefix = "pybind11_bazel-203508e14aab7309892a1c5f7dd05debda22d9a5", + urls = ["https://github.com/pybind/pybind11_bazel/archive/203508e14aab7309892a1c5f7dd05debda22d9a5.zip"], + sha256 = "75922da3a1bdb417d820398eb03d4e9bd067c4905a4246d35a44c01d62154d91", +) + +http_archive( + name = "pybind11", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.4.3.tar.gz", + "https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz", + ], + sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", + strip_prefix = "pybind11-2.4.3", + build_file = "@pybind11_bazel//:pybind11.BUILD", +) + http_archive( name = "ceres_solver", url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip", diff --git a/build_ios_examples.sh b/build_ios_examples.sh index 15725acc9..93b97fc4e 100644 --- a/build_ios_examples.sh +++ b/build_ios_examples.sh @@ -58,6 +58,9 @@ apps="${app_dir}/*" for app in ${apps}; do if [[ -d "${app}" ]]; then target_name=${app##*/} + if [[ "${target_name}" == "common" ]]; then + continue + fi target="${app}:${target_name}" echo "=== Target: ${target}" diff --git a/docs/getting_started/building_examples.md b/docs/getting_started/building_examples.md index be50f9bc2..35da25bef 100644 --- a/docs/getting_started/building_examples.md +++ b/docs/getting_started/building_examples.md @@ -422,3 +422,73 @@ Note: This currently works only on Linux, and please first follow This will open up your webcam as long as it is connected and on. Any errors is likely due to your webcam being not accessible, or GPU drivers not setup properly. + +## Python + +### Prerequisite + +1. Make sure that Bazel and OpenCV are correctly installed and configured for + MediaPipe. Please see [Installation](./install.md) for how to setup Bazel + and OpenCV for MediaPipe on Linux and macOS. + +2. Install the following dependencies. + + ```bash + # Debian or Ubuntu + $ sudo apt install python3-dev + $ sudo apt install python3-venv + $ sudo apt install -y protobuf-compiler + ``` + + ```bash + # macOS + $ brew install protobuf + ``` + +### Set up Python virtual environment. + +1. Activate a Python virtual environment. + + ```bash + $ python3 -m venv mp_env && source mp_env/bin/activate + ``` + +2. In the virtual environment, go to the MediaPipe repo directory. + +3. Install the required Python packages. + + ```bash + (mp_env)mediapipe$ pip3 install -r requirements.txt + ``` + +4. Generate and install MediaPipe package. + + ```bash + (mp_env)mediapipe$ python3 setup.py gen_protos + (mp_env)mediapipe$ python3 setup.py install + ``` + +### Run in Python interpreter + +Make sure you are not in the MediaPipe repo directory. + +Using [MediaPipe Pose](../solutions/pose.md) as an example: + +```bash +(mp_env)$ python3 +>>> import mediapipe as mp +>>> pose_tracker = mp.examples.UpperBodyPoseTracker() + +# For image input +>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file') +>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file') + +# For live camera input +# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.) +>>> pose_tracker.run_live() + +# Close the tracker. +>>> pose_tracker.close() +``` + +Tip: Use command `deactivate` to exit the Python virtual environment. diff --git a/docs/getting_started/hello_world_ios.md b/docs/getting_started/hello_world_ios.md index 2fdb028ce..19de67d01 100644 --- a/docs/getting_started/hello_world_ios.md +++ b/docs/getting_started/hello_world_ios.md @@ -18,8 +18,8 @@ This codelab uses MediaPipe on an iOS device. ### What you will learn -How to develop an iOS application that uses MediaPipe and run a MediaPipe -graph on iOS. +How to develop an iOS application that uses MediaPipe and run a MediaPipe graph +on iOS. ### What you will build @@ -42,8 +42,8 @@ We will be using the following graph, [`edge_detection_mobile_gpu.pbtxt`]: ``` # MediaPipe graph that performs GPU Sobel edge detection on a live video stream. # Used in the examples -# mediapipe/examples/android/src/java/com/mediapipe/apps/edgedetectiongpu. -# mediapipe/examples/ios/edgedetectiongpu. +# mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:helloworld +# and mediapipe/examples/ios/helloworld. # Images coming into and out of the graph. input_stream: "input_video" @@ -89,21 +89,21 @@ to build it. First, create an XCode project via File > New > Single View App. -Set the product name to "EdgeDetectionGpu", and use an appropriate organization +Set the product name to "HelloWorld", and use an appropriate organization identifier, such as `com.google.mediapipe`. The organization identifier alongwith the product name will be the `bundle_id` for the application, such as -`com.google.mediapipe.EdgeDetectionGpu`. +`com.google.mediapipe.HelloWorld`. Set the language to Objective-C. Save the project to an appropriate location. Let's call this `$PROJECT_TEMPLATE_LOC`. So your project will be in the -`$PROJECT_TEMPLATE_LOC/EdgeDetectionGpu` directory. This directory will contain -another directory named `EdgeDetectionGpu` and an `EdgeDetectionGpu.xcodeproj` file. +`$PROJECT_TEMPLATE_LOC/HelloWorld` directory. This directory will contain +another directory named `HelloWorld` and an `HelloWorld.xcodeproj` file. -The `EdgeDetectionGpu.xcodeproj` will not be useful for this tutorial, as we will -use bazel to build the iOS application. The content of the -`$PROJECT_TEMPLATE_LOC/EdgeDetectionGpu/EdgeDetectionGpu` directory is listed below: +The `HelloWorld.xcodeproj` will not be useful for this tutorial, as we will use +bazel to build the iOS application. The content of the +`$PROJECT_TEMPLATE_LOC/HelloWorld/HelloWorld` directory is listed below: 1. `AppDelegate.h` and `AppDelegate.m` 2. `ViewController.h` and `ViewController.m` @@ -112,10 +112,10 @@ use bazel to build the iOS application. The content of the 5. `Main.storyboard` and `Launch.storyboard` 6. `Assets.xcassets` directory. -Copy these files to a directory named `EdgeDetectionGpu` to a location that can -access the MediaPipe source code. For example, the source code of the -application that we will build in this tutorial is located in -`mediapipe/examples/ios/EdgeDetectionGpu`. We will refer to this path as the +Copy these files to a directory named `HelloWorld` to a location that can access +the MediaPipe source code. For example, the source code of the application that +we will build in this tutorial is located in +`mediapipe/examples/ios/HelloWorld`. We will refer to this path as the `$APPLICATION_PATH` throughout the codelab. Note: MediaPipe provides Objective-C bindings for iOS. The edge detection @@ -134,8 +134,8 @@ load( ) ios_application( - name = "EdgeDetectionGpuApp", - bundle_id = "com.google.mediapipe.EdgeDetectionGpu", + name = "HelloWorldApp", + bundle_id = "com.google.mediapipe.HelloWorld", families = [ "iphone", "ipad", @@ -143,11 +143,11 @@ ios_application( infoplists = ["Info.plist"], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = "//mediapipe/examples/ios:developer_provisioning_profile", - deps = [":EdgeDetectionGpuAppLibrary"], + deps = [":HelloWorldAppLibrary"], ) objc_library( - name = "EdgeDetectionGpuAppLibrary", + name = "HelloWorldAppLibrary", srcs = [ "AppDelegate.m", "ViewController.m", @@ -172,9 +172,8 @@ The `objc_library` rule adds dependencies for the `AppDelegate` and `ViewController` classes, `main.m` and the application storyboards. The templated app depends only on the `UIKit` SDK. -The `ios_application` rule uses the `EdgeDetectionGpuAppLibrary` Objective-C -library generated to build an iOS application for installation on your iOS -device. +The `ios_application` rule uses the `HelloWorldAppLibrary` Objective-C library +generated to build an iOS application for installation on your iOS device. Note: You need to point to your own iOS developer provisioning profile to be able to run the application on your iOS device. @@ -182,21 +181,20 @@ able to run the application on your iOS device. To build the app, use the following command in a terminal: ``` -bazel build -c opt --config=ios_arm64 <$APPLICATION_PATH>:EdgeDetectionGpuApp' +bazel build -c opt --config=ios_arm64 <$APPLICATION_PATH>:HelloWorldApp' ``` -For example, to build the `EdgeDetectionGpuApp` application in -`mediapipe/examples/ios/edgedetectiongpu`, use the following -command: +For example, to build the `HelloWorldApp` application in +`mediapipe/examples/ios/helloworld`, use the following command: ``` -bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/edgedetectiongpu:EdgeDetectionGpuApp +bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/helloworld:HelloWorldApp ``` Then, go back to XCode, open Window > Devices and Simulators, select your device, and add the `.ipa` file generated by the command above to your device. -Here is the document on [setting up and compiling](./building_examples.md#ios) iOS -MediaPipe apps. +Here is the document on [setting up and compiling](./building_examples.md#ios) +iOS MediaPipe apps. Open the application on your device. Since it is empty, it should display a blank white screen. @@ -502,8 +500,8 @@ in our app: }]; ``` -Note: It is important to start the graph before starting the camera, so that -the graph is ready to process frames as soon as the camera starts sending them. +Note: It is important to start the graph before starting the camera, so that the +graph is ready to process frames as soon as the camera starts sending them. Earlier, when we received frames from the camera in the `processVideoFrame` function, we displayed them in the `_liveView` using the `_renderer`. Now, we @@ -552,9 +550,12 @@ results of running the edge detection graph on a live video feed. Congrats! ![edge_detection_ios_gpu_gif](../images/mobile/edge_detection_ios_gpu.gif) -If you ran into any issues, please see the full code of the tutorial -[here](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/edgedetectiongpu). +Please note that the iOS examples now use a [common] template app. The code in +this tutorial is used in the [common] template app. The [helloworld] app has the +appropriate `BUILD` file dependencies for the edge detection graph. [Bazel]:https://bazel.build/ -[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt +[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt [MediaPipe installation guide]:./install.md +[common]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/common) +[helloworld]:(https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/helloworld) diff --git a/docs/getting_started/troubleshooting.md b/docs/getting_started/troubleshooting.md index 6d1a0e96e..76b4de3c8 100644 --- a/docs/getting_started/troubleshooting.md +++ b/docs/getting_started/troubleshooting.md @@ -27,13 +27,14 @@ Repository command failed usually indicates that Bazel fails to find the local Python binary. To solve this issue, please first find where the python binary is and then add -`--action_env PYTHON_BIN_PATH=` to the Bazel command like -the following: +`--action_env PYTHON_BIN_PATH=` to the Bazel command. For +example, you can switch to use the system default python3 binary by the +following command: ``` bazel build -c opt \ --define MEDIAPIPE_DISABLE_GPU=1 \ - --action_env PYTHON_BIN_PATH="/path/to/python" \ + --action_env PYTHON_BIN_PATH=$(which python3) \ mediapipe/examples/desktop/hello_world ``` diff --git a/docs/images/mobile/pose_tracking_android_gpu.gif b/docs/images/mobile/pose_tracking_android_gpu.gif new file mode 100644 index 000000000..deff2f02e Binary files /dev/null and b/docs/images/mobile/pose_tracking_android_gpu.gif differ diff --git a/docs/images/mobile/pose_tracking_android_gpu_small.gif b/docs/images/mobile/pose_tracking_android_gpu_small.gif new file mode 100644 index 000000000..9d3ec1522 Binary files /dev/null and b/docs/images/mobile/pose_tracking_android_gpu_small.gif differ diff --git a/docs/images/mobile/pose_tracking_detector_vitruvian_man.png b/docs/images/mobile/pose_tracking_detector_vitruvian_man.png new file mode 100644 index 000000000..ca25a5063 Binary files /dev/null and b/docs/images/mobile/pose_tracking_detector_vitruvian_man.png differ diff --git a/docs/images/mobile/pose_tracking_upper_body_example.gif b/docs/images/mobile/pose_tracking_upper_body_example.gif new file mode 100644 index 000000000..f9c0c5c6f Binary files /dev/null and b/docs/images/mobile/pose_tracking_upper_body_example.gif differ diff --git a/docs/images/mobile/pose_tracking_upper_body_landmarks.png b/docs/images/mobile/pose_tracking_upper_body_landmarks.png new file mode 100644 index 000000000..cb18ad567 Binary files /dev/null and b/docs/images/mobile/pose_tracking_upper_body_landmarks.png differ diff --git a/docs/index.md b/docs/index.md index bd27df416..84ad2a6a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,9 +22,9 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Iris | Hands -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: -[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) +Face Detection | Face Mesh | Iris πŸ†• | Hands | Pose πŸ†• +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :----: +[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT :-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: @@ -33,20 +33,21 @@ Hair Segmentation -[]() | Android | iOS | Desktop | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | βœ… | βœ… -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | -[Iris](https://google.github.io/mediapipe/solutions/iris) | βœ… | βœ… | βœ… | βœ… | -[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | βœ… | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | βœ… | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | βœ… -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | | βœ… | βœ… +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) πŸ†• | βœ… | βœ… | βœ… | | βœ… | +[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | | βœ… | +[Pose](https://google.github.io/mediapipe/solutions/pose) πŸ†• | βœ… | βœ… | βœ… | βœ… | βœ… | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | | βœ… | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | | βœ… +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | | ## MediaPipe on the Web @@ -68,6 +69,7 @@ never leaves your device. * [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) +* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) ## Getting started @@ -86,8 +88,10 @@ run code search using ## Publications -* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single - Image](https://mediapipe.page.link/iris-blog) in Google AI Blog +* [bazelPose - On-device Real-time Body Pose Tracking](https://mediapipe.page.link/bazelpose-blog) + in Google AI Blog +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) + in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) diff --git a/docs/solutions/autoflip.md b/docs/solutions/autoflip.md index faad99e92..48204d5b6 100644 --- a/docs/solutions/autoflip.md +++ b/docs/solutions/autoflip.md @@ -2,7 +2,7 @@ layout: default title: AutoFlip (Saliency-aware Video Cropping) parent: Solutions -nav_order: 10 +nav_order: 11 --- # AutoFlip: Saliency-aware Video Cropping diff --git a/docs/solutions/box_tracking.md b/docs/solutions/box_tracking.md index 5c73a97fb..34fed0277 100644 --- a/docs/solutions/box_tracking.md +++ b/docs/solutions/box_tracking.md @@ -2,7 +2,7 @@ layout: default title: Box Tracking parent: Solutions -nav_order: 7 +nav_order: 8 --- # MediaPipe Box Tracking diff --git a/docs/solutions/face_detection.md b/docs/solutions/face_detection.md index 4b9534b22..036624332 100644 --- a/docs/solutions/face_detection.md +++ b/docs/solutions/face_detection.md @@ -107,4 +107,4 @@ to cross-compile and run MediaPipe examples on the [TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite) * For back-facing camera: [TFLite model ](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_back.tflite) -* [Model card](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view) +* [Model card](https://mediapipe.page.link/blazeface-mc) diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index c678901a7..712ea5b0b 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -125,7 +125,7 @@ Tip: Maximum number of faces to detect/process is set to 1 by default. To change it, for Android modify `NUM_FACES` in [MainActivity.java](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java), and for iOS modify `kNumFaces` in -[ViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu/ViewController.mm). +[FaceMeshGpuViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.mm). ### Desktop @@ -157,4 +157,4 @@ it, in the graph file modify the option of `ConstantSidePacketCalculator`. * Face landmark model: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) -* [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) +* [Model card](https://mediapipe.page.link/facemesh-mc) diff --git a/docs/solutions/hair_segmentation.md b/docs/solutions/hair_segmentation.md index 0dec46951..0521ad60d 100644 --- a/docs/solutions/hair_segmentation.md +++ b/docs/solutions/hair_segmentation.md @@ -2,7 +2,7 @@ layout: default title: Hair Segmentation parent: Solutions -nav_order: 5 +nav_order: 6 --- # MediaPipe Hair Segmentation @@ -55,4 +55,4 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web). ([presentation](https://drive.google.com/file/d/1C8WYlWdDRNtU1_pYBvkkG5Z5wqYqf0yj/view)) ([supplementary video](https://drive.google.com/file/d/1LPtM99Ch2ogyXYbDNpEqnUfhFq0TfLuf/view)) * [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite) -* [Model card](https://drive.google.com/file/d/1lPwJ8BD_-3UUor4LayQ0xpa_RIC_hoRh/view) +* [Model card](https://mediapipe.page.link/hairsegmentation-mc) diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md index d95b804ca..6aae833e3 100644 --- a/docs/solutions/iris.md +++ b/docs/solutions/iris.md @@ -102,7 +102,7 @@ camera with less than 10% error, without requiring any specialized hardware. This is done by relying on the fact that the horizontal iris diameter of the human eye remains roughly constant at 11.7Β±0.5 mm across a wide population, along with some simple geometric arguments. For more details please refer to our -[Google AI Blog post](https://mediapipe.page.link/iris-blog). +[Google AI Blog post](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html). ![iris_tracking_depth_from_iris.gif](../images/mobile/iris_tracking_depth_from_iris.gif) | :--------------------------------------------------------------------------------------------: | @@ -189,8 +189,8 @@ Please refer to [these instructions](../index.md#mediapipe-on-the-web). ## Resources -* Google AI Blog: [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation - from a Single Image](https://mediapipe.page.link/iris-blog) +* Google AI Blog: + [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) * Paper: [Real-time Pupil Tracking from Monocular Video for Digital Puppetry](https://arxiv.org/abs/2006.11341) ([presentation](https://youtu.be/cIhXkiiapQI)) diff --git a/docs/solutions/knift.md b/docs/solutions/knift.md index 942ad255f..82e84fd28 100644 --- a/docs/solutions/knift.md +++ b/docs/solutions/knift.md @@ -2,7 +2,7 @@ layout: default title: KNIFT (Template-based Feature Matching) parent: Solutions -nav_order: 9 +nav_order: 10 --- # MediaPipe KNIFT diff --git a/docs/solutions/media_sequence.md b/docs/solutions/media_sequence.md index dc3ef63bc..7085f090b 100644 --- a/docs/solutions/media_sequence.md +++ b/docs/solutions/media_sequence.md @@ -2,7 +2,7 @@ layout: default title: Dataset Preparation with MediaSequence parent: Solutions -nav_order: 11 +nav_order: 12 --- # Dataset Preparation with MediaSequence diff --git a/docs/solutions/object_detection.md b/docs/solutions/object_detection.md index 340e1990a..1cb353d0e 100644 --- a/docs/solutions/object_detection.md +++ b/docs/solutions/object_detection.md @@ -2,7 +2,7 @@ layout: default title: Object Detection parent: Solutions -nav_order: 6 +nav_order: 7 --- # MediaPipe Object Detection diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index 0239f174c..5029917dd 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -2,7 +2,7 @@ layout: default title: Objectron (3D Object Detection) parent: Solutions -nav_order: 8 +nav_order: 9 --- # MediaPipe Objectron diff --git a/docs/solutions/pose.md b/docs/solutions/pose.md new file mode 100644 index 000000000..92a8be981 --- /dev/null +++ b/docs/solutions/pose.md @@ -0,0 +1,179 @@ +--- +layout: default +title: Pose +parent: Solutions +nav_order: 5 +--- + +# MediaPipe Pose +{: .no_toc } + +1. TOC +{:toc} +--- + +## Overview + +Human pose estimation from video plays a critical role in various applications +such as quantifying physical exercises, sign language recognition, and full-body +gesture control. For example, it can form the basis for yoga, dance, and fitness +applications. It can also enable the overlay of digital content and information +on top of the physical world in augmented reality. + +MediaPipe Pose is a ML solution for high-fidelity upper-body pose tracking, +inferring 25 2D upper-body landmarks from RGB video frames utilizing our +[BlazePose](https://mediapipe.page.link/blazepose-blog) research. Current +state-of-the-art approaches rely primarily on powerful desktop environments for +inference, whereas our method achieves real-time performance on most modern +[mobile phones](#mobile), [desktops/laptops](#desktop), in [python](#python) and +even on the [web](#web). A variant of MediaPipe Pose that performs full-body +pose tracking on mobile phones will be included in an upcoming release of +[ML Kit](https://developers.google.com/ml-kit/early-access/pose-detection). + +![pose_tracking_upper_body_example.gif](../images/mobile/pose_tracking_upper_body_example.gif) | +:--------------------------------------------------------------------------------------------: | +*Fig 1. Example of MediaPipe Pose for upper-body pose tracking.* | + +## ML Pipeline + +The solution utilizes a two-step detector-tracker ML pipeline, proven to be +effective in our [MediaPipe Hands](./hands.md) and +[MediaPipe Face Mesh](./face_mesh.md) solutions. Using a detector, the pipeline +first locates the pose region-of-interest (ROI) within the frame. The tracker +subsequently predicts the pose landmarks within the ROI using the ROI-cropped +frame as input. Note that for video use cases the detector is invoked only as +needed, i.e., for the very first frame and when the tracker could no longer +identify body pose presence in the previous frame. For other frames the pipeline +simply derives the ROI from the previous frame’s pose landmarks. + +The pipeline is implemented as a MediaPipe +[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt) +that uses a +[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt) +from the +[pose landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark) +and renders using a dedicated +[upper-body pose renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_gpu.pbtxt). +The +[pose landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt) +internally uses a +[pose detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt) +from the +[pose detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection). + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +## Models + +### Pose Detection Model (BlazePose Detector) + +The detector is inspired by our own lightweight +[BlazeFace](https://arxiv.org/abs/1907.05047) model, used in +[MediaPipe Face Detection](./face_detection.md), as a proxy for a person +detector. It explicitly predicts two additional virtual keypoints that firmly +describe the human body center, rotation and scale as a circle. Inspired by +[Leonardo’s Vitruvian man](https://en.wikipedia.org/wiki/Vitruvian_Man), we +predict the midpoint of a person's hips, the radius of a circle circumscribing +the whole person, and the incline angle of the line connecting the shoulder and +hip midpoints. + +![pose_tracking_detector_vitruvian_man.png](../images/mobile/pose_tracking_detector_vitruvian_man.png) | +:----------------------------------------------------------------------------------------------------: | +*Fig 2. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* | + +### Pose Landmark Model (BlazePose Tracker) + +The landmark model currently included in MediaPipe Pose predicts the location of +25 upper-body landmarks (see figure below), with three degrees of freedom each +(x, y location and visibility), plus two virtual alignment keypoints. It shares +the same architecture as the full-body version that predicts 33 landmarks, +described in more detail in the +[BlazePose Google AI Blog](https://mediapipe.page.link/blazepose-blog) and in +this [paper](https://arxiv.org/abs/2006.10204). + +![pose_tracking_upper_body_landmarks.png](../images/mobile/pose_tracking_upper_body_landmarks.png) | +:------------------------------------------------------------------------------------------------: | +*Fig 3. 25 upper-body pose landmarks.* | + +## Example Apps + +Please first see general instructions for +[Android](../getting_started/building_examples.md#android), +[iOS](../getting_started/building_examples.md#ios), +[desktop](../getting_started/building_examples.md#desktop) and +[Python](../getting_started/building_examples.md#python) on how to build +MediaPipe examples. + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +### Mobile + +* Graph: + [`mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt) +* Android target: + [(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1uKc6T7KSuA0Mlq2URi5YookHu0U3yoh_/view?usp=sharing) + [`mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu:upperbodyposetrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD) +* iOS target: + [`mediapipe/examples/ios/upperbodyposetrackinggpu:UpperBodyPoseTrackingGpuApp`](http:/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD) + +### Desktop + +Please first see general instructions for +[desktop](../getting_started/building_examples.md#desktop) on how to build +MediaPipe examples. + +* Running on CPU + * Graph: + [`mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/upper_body_pose_tracking:upper_body_pose_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD) +* Running on GPU + * Graph: + [`mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/upper_body_pose_tracking:upper_body_pose_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD) + +### Python + +Please first see general instructions for +[Python](../getting_started/building_examples.md#python) examples. + +```bash +(mp_env)$ python3 +>>> import mediapipe as mp +>>> pose_tracker = mp.examples.UpperBodyPoseTracker() + +# For image input +>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file') +>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file') + +# For live camera input +# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.) +>>> pose_tracker.run_live() + +# Close the tracker. +>>> pose_tracker.close() +``` + +### Web + +Please refer to [these instructions](../index.md#mediapipe-on-the-web). + +## Resources + +* Google AI Blog: + [BlazePose - On-device Real-time Body Pose Tracking](https://mediapipe.page.link/blazepose-blog) +* Paper: + [BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204) + ([presentation](https://youtu.be/YPpUOTRn5tA)) +* Pose detection model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection.tflite) +* Upper-body pose landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite) +* [Model card](https://mediapipe.page.link/blazepose-mc) diff --git a/docs/solutions/solutions.md b/docs/solutions/solutions.md index 840b5ce3d..e1822e4c0 100644 --- a/docs/solutions/solutions.md +++ b/docs/solutions/solutions.md @@ -16,17 +16,18 @@ has_toc: false -[]() | Android | iOS | Desktop | Web | Coral -:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | βœ… | βœ… -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | -[Iris](https://google.github.io/mediapipe/solutions/iris) | βœ… | βœ… | βœ… | βœ… | -[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | βœ… | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | βœ… | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | βœ… -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | +[]() | Android | iOS | Desktop | Python | Web | Coral +:---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---: +[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | βœ… | βœ… | βœ… | | βœ… | βœ… +[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | βœ… | βœ… | βœ… | | | +[Iris](https://google.github.io/mediapipe/solutions/iris) πŸ†• | βœ… | βœ… | βœ… | | βœ… | +[Hands](https://google.github.io/mediapipe/solutions/hands) | βœ… | βœ… | βœ… | | βœ… | +[Pose](https://google.github.io/mediapipe/solutions/pose) πŸ†• | βœ… | βœ… | βœ… | βœ… | βœ… | +[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | βœ… | | βœ… | | βœ… | +[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | βœ… | βœ… | βœ… | | | βœ… +[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | βœ… | βœ… | βœ… | | | +[Objectron](https://google.github.io/mediapipe/solutions/objectron) | βœ… | | | | | +[KNIFT](https://google.github.io/mediapipe/solutions/knift) | βœ… | | | | | +[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | βœ… | | | +[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | βœ… | | | +[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | βœ… | | | diff --git a/docs/solutions/youtube_8m.md b/docs/solutions/youtube_8m.md index ebb51dcc4..1c1637bff 100644 --- a/docs/solutions/youtube_8m.md +++ b/docs/solutions/youtube_8m.md @@ -2,7 +2,7 @@ layout: default title: YouTube-8M Feature Extraction and Model Inference parent: Solutions -nav_order: 12 +nav_order: 13 --- # YouTube-8M Feature Extraction and Model Inference diff --git a/docs/tools/tracing_and_profiling.md b/docs/tools/tracing_and_profiling.md index a0188836b..2c05abfe4 100644 --- a/docs/tools/tracing_and_profiling.md +++ b/docs/tools/tracing_and_profiling.md @@ -294,7 +294,7 @@ trace_log_margin_usec in trace log output. This margin allows time for events to be appended to the TraceBuffer. -trace_log_duration_events +trace_log_instant_events : False specifies an event for each calculator invocation. True specifies a separate event for each start and finish time. diff --git a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen index b8e8f95bf..7d501c803 100644 --- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen +++ b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen @@ -3,8 +3,11 @@ "/BUILD", "mediapipe/BUILD", "mediapipe/objc/BUILD", - "mediapipe/examples/ios/BUILD", - "mediapipe/examples/ios/edgedetectiongpu/BUILD", + "mediapipe/framework/BUILD", + "mediapipe/gpu/BUILD", + "mediapipe/objc/testing/app/BUILD", + "mediapipe/examples/ios/common/BUILD", + "mediapipe/examples/ios/helloworld/BUILD", "mediapipe/examples/ios/facedetectioncpu/BUILD", "mediapipe/examples/ios/facedetectiongpu/BUILD", "mediapipe/examples/ios/facemeshgpu/BUILD", @@ -13,10 +16,11 @@ "mediapipe/examples/ios/iristrackinggpu/BUILD", "mediapipe/examples/ios/multihandtrackinggpu/BUILD", "mediapipe/examples/ios/objectdetectioncpu/BUILD", - "mediapipe/examples/ios/objectdetectiongpu/BUILD" + "mediapipe/examples/ios/objectdetectiongpu/BUILD", + "mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD" ], "buildTargets" : [ - "//mediapipe/examples/ios/edgedetectiongpu:EdgeDetectionGpuApp", + "//mediapipe/examples/ios/helloworld:HelloWorldApp", "//mediapipe/examples/ios/facedetectioncpu:FaceDetectionCpuApp", "//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp", "//mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp", @@ -26,6 +30,7 @@ "//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp", "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp", "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp", + "//mediapipe/examples/ios/upperbodyposetrackinggpu:UpperBodyPoseTrackingGpuApp", "//mediapipe/objc:mediapipe_framework_ios" ], "optionSet" : { @@ -80,24 +85,18 @@ "mediapipe/calculators/util", "mediapipe/examples", "mediapipe/examples/ios", - "mediapipe/examples/ios/edgedetectiongpu", - "mediapipe/examples/ios/edgedetectiongpu/Base.lproj", + "mediapipe/examples/ios/common", + "mediapipe/examples/ios/common/Base.lproj", + "mediapipe/examples/ios/helloworld", "mediapipe/examples/ios/facedetectioncpu", - "mediapipe/examples/ios/facedetectioncpu/Base.lproj", "mediapipe/examples/ios/facedetectiongpu", - "mediapipe/examples/ios/facedetectiongpu/Base.lproj", "mediapipe/examples/ios/handdetectiongpu", - "mediapipe/examples/ios/handdetectiongpu/Base.lproj", "mediapipe/examples/ios/handtrackinggpu", - "mediapipe/examples/ios/handtrackinggpu/Base.lproj", "mediapipe/examples/ios/iristrackinggpu", - "mediapipe/examples/ios/iristrackinggpu/Base.lproj", "mediapipe/examples/ios/multihandtrackinggpu", - "mediapipe/examples/ios/multihandtrackinggpu/Base.lproj", "mediapipe/examples/ios/objectdetectioncpu", - "mediapipe/examples/ios/objectdetectioncpu/Base.lproj", "mediapipe/examples/ios/objectdetectiongpu", - "mediapipe/examples/ios/objectdetectiongpu/Base.lproj", + "mediapipe/examples/ios/upperbodyposetrackinggpu", "mediapipe/framework", "mediapipe/framework/deps", "mediapipe/framework/formats", @@ -113,6 +112,7 @@ "mediapipe/graphs/face_detection", "mediapipe/graphs/hand_tracking", "mediapipe/graphs/object_detection", + "mediapipe/graphs/pose_tracking", "mediapipe/models", "mediapipe/modules", "mediapipe/objc", diff --git a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf index 9f1ab5d66..432316521 100644 --- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf +++ b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf @@ -11,7 +11,6 @@ "mediapipe", "mediapipe/objc", "mediapipe/examples/ios", - "mediapipe/examples/ios/edgedetectiongpu", "mediapipe/examples/ios/facedetectioncpu", "mediapipe/examples/ios/facedetectiongpu", "mediapipe/examples/ios/facemeshgpu", @@ -20,7 +19,8 @@ "mediapipe/examples/ios/iristrackinggpu", "mediapipe/examples/ios/multihandtrackinggpu", "mediapipe/examples/ios/objectdetectioncpu", - "mediapipe/examples/ios/objectdetectiongpu" + "mediapipe/examples/ios/objectdetectiongpu", + "mediapipe/examples/ios/upperbodyposetrackinggpu" ], "projectName" : "Mediapipe", "workspaceRoot" : "../.." diff --git a/mediapipe/__init__.py b/mediapipe/__init__.py index 6db73bc52..c0a275823 100644 --- a/mediapipe/__init__.py +++ b/mediapipe/__init__.py @@ -1,4 +1,4 @@ -"""Copyright 2019 The MediaPipe Authors. +"""Copyright 2019 - 2020 The MediaPipe Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,3 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ + +import mediapipe.examples.python as examples +from mediapipe.python import * +import mediapipe.util as util diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index a0b22054f..1dddd7527 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -606,6 +606,35 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "packet_presence_calculator", + srcs = ["packet_presence_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "packet_presence_calculator_test", + srcs = ["packet_presence_calculator_test.cc"], + deps = [ + ":gate_calculator", + ":packet_presence_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "//mediapipe/framework/tool:sink", + ], +) + cc_library( name = "previous_loopback_calculator", srcs = ["previous_loopback_calculator.cc"], diff --git a/mediapipe/calculators/core/packet_presence_calculator.cc b/mediapipe/calculators/core/packet_presence_calculator.cc new file mode 100644 index 000000000..468d31718 --- /dev/null +++ b/mediapipe/calculators/core/packet_presence_calculator.cc @@ -0,0 +1,84 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// For each non empty input packet, emits a single output packet containing a +// boolean value "true", "false" in response to empty packets (a.k.a. timestamp +// bound updates) This can be used to "flag" the presence of an arbitrary packet +// type as input into a downstream calculator. +// +// Inputs: +// PACKET - any type. +// +// Outputs: +// PRESENCE - bool. +// "true" if packet is not empty, "false" if there's timestamp bound update +// instead. +// +// Examples: +// node: { +// calculator: "PacketPresenceCalculator" +// input_stream: "PACKET:packet" +// output_stream: "PRESENCE:presence" +// } +// +// This calculator can be used in conjuction with GateCalculator in order to +// allow/disallow processing. For instance: +// node: { +// calculator: "PacketPresenceCalculator" +// input_stream: "PACKET:value" +// output_stream: "PRESENCE:disallow_if_present" +// } +// node { +// calculator: "GateCalculator" +// input_stream: "image" +// input_stream: "DISALLOW:disallow_if_present" +// output_stream: "image_for_processing" +// options: { +// [mediapipe.GateCalculatorOptions.ext] { +// empty_packets_as_allow: true +// } +// } +// } +class PacketPresenceCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag("PACKET").SetAny(); + cc->Outputs().Tag("PRESENCE").Set(); + // Process() function is invoked in response to input stream timestamp + // bound updates. + cc->SetProcessTimestampBounds(true); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + cc->Outputs() + .Tag("PRESENCE") + .AddPacket(MakePacket(!cc->Inputs().Tag("PACKET").IsEmpty()) + .At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(PacketPresenceCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/packet_presence_calculator_test.cc b/mediapipe/calculators/core/packet_presence_calculator_test.cc new file mode 100644 index 000000000..b1b8a8e85 --- /dev/null +++ b/mediapipe/calculators/core/packet_presence_calculator_test.cc @@ -0,0 +1,85 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/framework/tool/sink.h" + +namespace mediapipe { +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Value; +namespace { + +MATCHER_P2(BoolPacket, value, timestamp, "") { + return Value(arg.template Get(), Eq(value)) && + Value(arg.Timestamp(), Eq(timestamp)); +} + +TEST(PreviousLoopbackCalculator, CorrectTimestamps) { + std::vector output_packets; + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: 'allow' + input_stream: 'value' + node { + calculator: "GateCalculator" + input_stream: 'value' + input_stream: 'ALLOW:allow' + output_stream: 'gated_value' + } + node { + calculator: 'PacketPresenceCalculator' + input_stream: 'PACKET:gated_value' + output_stream: 'PRESENCE:presence' + } + )"); + tool::AddVectorSink("presence", &graph_config, &output_packets); + + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config, {})); + MP_ASSERT_OK(graph.StartRun({})); + + auto send_packet = [&graph](int value, bool allow, Timestamp timestamp) { + MP_ASSERT_OK(graph.AddPacketToInputStream( + "value", MakePacket(value).At(timestamp))); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "allow", MakePacket(allow).At(timestamp))); + }; + + send_packet(10, false, Timestamp(10)); + MP_EXPECT_OK(graph.WaitUntilIdle()); + EXPECT_THAT(output_packets, ElementsAre(BoolPacket(false, Timestamp(10)))); + + output_packets.clear(); + send_packet(20, true, Timestamp(11)); + MP_EXPECT_OK(graph.WaitUntilIdle()); + EXPECT_THAT(output_packets, ElementsAre(BoolPacket(true, Timestamp(11)))); + + MP_EXPECT_OK(graph.CloseAllInputStreams()); + MP_EXPECT_OK(graph.WaitUntilDone()); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 8ed8a7ae8..a48cb2796 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -201,7 +201,7 @@ int GetXnnpackNumThreads( // Input tensors are assumed to be of the correct size and already normalized. // All output TfLiteTensors will be destroyed when the graph closes, // (i.e. after calling graph.WaitUntilDone()). -// GPU tensors are currently only supported on Android and iOS. +// GPU tensor support rquires OpenGL ES 3.1+. // This calculator uses FixedSizeInputStreamHandler by default. // class TfLiteInferenceCalculator : public CalculatorBase { diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 376b608b0..c4f0f8283 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -20,6 +20,24 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) +cc_library( + name = "alignment_points_to_rects_calculator", + srcs = ["alignment_points_to_rects_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + proto_library( name = "annotation_overlay_calculator_proto", srcs = ["annotation_overlay_calculator.proto"], @@ -586,6 +604,15 @@ proto_library( ], ) +proto_library( + name = "rect_to_render_scale_calculator_proto", + srcs = ["rect_to_render_scale_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + proto_library( name = "detections_to_render_data_calculator_proto", srcs = ["detections_to_render_data_calculator.proto"], @@ -700,7 +727,15 @@ mediapipe_cc_proto_library( deps = [":rect_to_render_data_calculator_proto"], ) -# TODO: What is that one for? +mediapipe_cc_proto_library( + name = "rect_to_render_scale_calculator_cc_proto", + srcs = ["rect_to_render_scale_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + ], + visibility = ["//visibility:public"], + deps = [":rect_to_render_scale_calculator_proto"], +) mediapipe_cc_proto_library( name = "detections_to_render_data_calculator_cc_proto", @@ -830,6 +865,19 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "rect_to_render_scale_calculator", + srcs = ["rect_to_render_scale_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":rect_to_render_scale_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + ], + alwayslink = 1, +) + cc_test( name = "detections_to_render_data_calculator_test", size = "small", diff --git a/mediapipe/calculators/util/alignment_points_to_rects_calculator.cc b/mediapipe/calculators/util/alignment_points_to_rects_calculator.cc new file mode 100644 index 000000000..49768eae7 --- /dev/null +++ b/mediapipe/calculators/util/alignment_points_to_rects_calculator.cc @@ -0,0 +1,102 @@ +#include + +#include "mediapipe/calculators/util/detections_to_rects_calculator.h" +#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace {} // namespace + +// A calculator that converts Detection with two alignment points to Rect. +// +// Detection should contain two points: +// * Center point - center of the crop +// * Scale point - vector from center to scale point defines size and rotation +// of the Rect. Not that Y coordinate of this vector is flipped before +// computing the rotation (it is caused by the fact that Y axis is +// directed downwards). So define target rotation vector accordingly. +// +// Example config: +// node { +// calculator: "AlignmentPointsRectsCalculator" +// input_stream: "DETECTIONS:detections" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "NORM_RECT:rect" +// options: { +// [mediapipe.DetectionsToRectsCalculatorOptions.ext] { +// rotation_vector_start_keypoint_index: 0 +// rotation_vector_end_keypoint_index: 1 +// rotation_vector_target_angle_degrees: 90 +// output_zero_rect_for_empty_detections: true +// } +// } +// } +class AlignmentPointsRectsCalculator : public DetectionsToRectsCalculator { + public: + ::mediapipe::Status Open(CalculatorContext* cc) override { + RET_CHECK_OK(DetectionsToRectsCalculator::Open(cc)); + + // Make sure that start and end keypoints are provided. + // They are required for the rect size calculation and will also force base + // calculator to compute rotation. + options_ = cc->Options(); + RET_CHECK(options_.has_rotation_vector_start_keypoint_index()) + << "Start keypoint is required to calculate rect size and rotation"; + RET_CHECK(options_.has_rotation_vector_end_keypoint_index()) + << "End keypoint is required to calculate rect size and rotation"; + + return ::mediapipe::OkStatus(); + } + + private: + ::mediapipe::Status DetectionToNormalizedRect( + const ::mediapipe::Detection& detection, + const DetectionSpec& detection_spec, + ::mediapipe::NormalizedRect* rect) override; +}; +REGISTER_CALCULATOR(AlignmentPointsRectsCalculator); + +::mediapipe::Status AlignmentPointsRectsCalculator::DetectionToNormalizedRect( + const Detection& detection, const DetectionSpec& detection_spec, + NormalizedRect* rect) { + const auto& location_data = detection.location_data(); + const auto& image_size = detection_spec.image_size; + RET_CHECK(image_size) << "Image size is required to calculate the rect"; + + const float x_center = + location_data.relative_keypoints(start_keypoint_index_).x() * + image_size->first; + const float y_center = + location_data.relative_keypoints(start_keypoint_index_).y() * + image_size->second; + + const float x_scale = + location_data.relative_keypoints(end_keypoint_index_).x() * + image_size->first; + const float y_scale = + location_data.relative_keypoints(end_keypoint_index_).y() * + image_size->second; + + // Bounding box size as double distance from center to scale point. + const float box_size = + std::sqrt((x_scale - x_center) * (x_scale - x_center) + + (y_scale - y_center) * (y_scale - y_center)) * + 2.0; + + // Set resulting bounding box. + rect->set_x_center(x_center / image_size->first); + rect->set_y_center(y_center / image_size->second); + rect->set_width(box_size / image_size->first); + rect->set_height(box_size / image_size->second); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/rect_to_render_scale_calculator.cc b/mediapipe/calculators/util/rect_to_render_scale_calculator.cc new file mode 100644 index 000000000..d55063aa4 --- /dev/null +++ b/mediapipe/calculators/util/rect_to_render_scale_calculator.cc @@ -0,0 +1,111 @@ +#include "mediapipe/calculators/util/rect_to_render_scale_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/rect.pb.h" + +namespace mediapipe { + +namespace { + +constexpr char kNormRectTag[] = "NORM_RECT"; +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +constexpr char kRenderScaleTag[] = "RENDER_SCALE"; + +} // namespace + +// A calculator to get scale for RenderData primitives. +// +// This calculator allows you to make RenderData primitives size (configured via +// `thickness`) to depend on actual size of the object they should highlight +// (e.g. pose, hand or face). It will give you bigger rendered primitives for +// bigger/closer objects and smaller primitives for smaller/far objects. +// +// IMPORTANT NOTE: RenderData primitives are rendered via OpenCV, which accepts +// only integer thickness. So when object goes further/closer you'll see 1 pixel +// jumps. +// +// Check `mediapipe/util/render_data.proto` for details on +// RenderData primitives and `thickness` parameter. +// +// Inputs: +// NORM_RECT: Normalized rectangle to compute object size from as maximum of +// width and height. +// IMAGE_SIZE: A std::pair represention of image width and height to +// transform normalized object width and height to absolute pixel +// coordinates. +// +// Outputs: +// RENDER_SCALE: Float value that should be used to scale RenderData +// primitives calculated as `rect_size * multiplier`. +// +// Example config: +// node { +// calculator: "RectToRenderScaleCalculator" +// input_stream: "NORM_RECT:pose_landmarks_rect" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "RENDER_SCALE:render_scale" +// options: { +// [mediapipe.RectToRenderScaleCalculatorOptions.ext] { +// multiplier: 0.001 +// } +// } +// } +class RectToRenderScaleCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + RectToRenderScaleCalculatorOptions options_; +}; +REGISTER_CALCULATOR(RectToRenderScaleCalculator); + +::mediapipe::Status RectToRenderScaleCalculator::GetContract( + CalculatorContract* cc) { + cc->Inputs().Tag(kNormRectTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs().Tag(kRenderScaleTag).Set(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status RectToRenderScaleCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + options_ = cc->Options(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status RectToRenderScaleCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().Tag(kNormRectTag).IsEmpty()) { + cc->Outputs() + .Tag(kRenderScaleTag) + .AddPacket( + MakePacket(options_.multiplier()).At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } + + // Get image size. + int image_width; + int image_height; + std::tie(image_width, image_height) = + cc->Inputs().Tag(kImageSizeTag).Get>(); + + // Get rect size in absolute pixel coordinates. + const auto& rect = cc->Inputs().Tag(kNormRectTag).Get(); + const float rect_width = rect.width() * image_width; + const float rect_height = rect.height() * image_height; + + // Calculate render scale. + const float rect_size = std::max(rect_width, rect_height); + const float render_scale = rect_size * options_.multiplier(); + + cc->Outputs() + .Tag(kRenderScaleTag) + .AddPacket(MakePacket(render_scale).At(cc->InputTimestamp())); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/rect_to_render_scale_calculator.proto b/mediapipe/calculators/util/rect_to_render_scale_calculator.proto new file mode 100644 index 000000000..ef80cf3cf --- /dev/null +++ b/mediapipe/calculators/util/rect_to_render_scale_calculator.proto @@ -0,0 +1,18 @@ +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message RectToRenderScaleCalculatorOptions { + extend CalculatorOptions { + optional RectToRenderScaleCalculatorOptions ext = 299463409; + } + + // Multiplier to apply to the rect size. + // If one defined `thickness` for RenderData primitives for object (e.g. pose, + // hand or face) of size `A` then multiplier should be `1/A`. It means that + // when actual object size on the image will be `B`, than all RenderData + // primitives will be scaled with factor `B/A`. + optional float multiplier = 1 [default = 0.01]; +} diff --git a/mediapipe/calculators/video/tracked_detection_manager_calculator.cc b/mediapipe/calculators/video/tracked_detection_manager_calculator.cc index 206711f43..7e6ba6749 100644 --- a/mediapipe/calculators/video/tracked_detection_manager_calculator.cc +++ b/mediapipe/calculators/video/tracked_detection_manager_calculator.cc @@ -197,90 +197,88 @@ REGISTER_CALCULATOR(TrackedDetectionManagerCalculator); ::mediapipe::Status TrackedDetectionManagerCalculator::Process( CalculatorContext* cc) { - if (cc->Inputs().HasTag("TRACKING_BOXES")) { - if (!cc->Inputs().Tag("TRACKING_BOXES").IsEmpty()) { - const TimedBoxProtoList& tracked_boxes = - cc->Inputs().Tag("TRACKING_BOXES").Get(); + if (cc->Inputs().HasTag(kTrackingBoxesTag) && + !cc->Inputs().Tag(kTrackingBoxesTag).IsEmpty()) { + const TimedBoxProtoList& tracked_boxes = + cc->Inputs().Tag(kTrackingBoxesTag).Get(); - // Collect all detections that are removed. - auto removed_detection_ids = absl::make_unique>(); - for (const TimedBoxProto& tracked_box : tracked_boxes.box()) { - NormalizedRect bounding_box; - bounding_box.set_x_center((tracked_box.left() + tracked_box.right()) / - 2.f); - bounding_box.set_y_center((tracked_box.bottom() + tracked_box.top()) / - 2.f); - bounding_box.set_height(tracked_box.bottom() - tracked_box.top()); - bounding_box.set_width(tracked_box.right() - tracked_box.left()); - bounding_box.set_rotation(tracked_box.rotation()); - // First check if this box updates a detection that's waiting for - // update from the tracker. - auto waiting_for_update_detectoin_ptr = - waiting_for_update_detections_.find(tracked_box.id()); - if (waiting_for_update_detectoin_ptr != - waiting_for_update_detections_.end()) { - // Add the detection and remove duplicated detections. - auto removed_ids = tracked_detection_manager_.AddDetection( - std::move(waiting_for_update_detectoin_ptr->second)); - MoveIds(removed_detection_ids.get(), std::move(removed_ids)); - - waiting_for_update_detections_.erase( - waiting_for_update_detectoin_ptr); - } - auto removed_ids = tracked_detection_manager_.UpdateDetectionLocation( - tracked_box.id(), bounding_box, tracked_box.time_msec()); + // Collect all detections that are removed. + auto removed_detection_ids = absl::make_unique>(); + for (const TimedBoxProto& tracked_box : tracked_boxes.box()) { + NormalizedRect bounding_box; + bounding_box.set_x_center((tracked_box.left() + tracked_box.right()) / + 2.f); + bounding_box.set_y_center((tracked_box.bottom() + tracked_box.top()) / + 2.f); + bounding_box.set_height(tracked_box.bottom() - tracked_box.top()); + bounding_box.set_width(tracked_box.right() - tracked_box.left()); + bounding_box.set_rotation(tracked_box.rotation()); + // First check if this box updates a detection that's waiting for + // update from the tracker. + auto waiting_for_update_detectoin_ptr = + waiting_for_update_detections_.find(tracked_box.id()); + if (waiting_for_update_detectoin_ptr != + waiting_for_update_detections_.end()) { + // Add the detection and remove duplicated detections. + auto removed_ids = tracked_detection_manager_.AddDetection( + std::move(waiting_for_update_detectoin_ptr->second)); MoveIds(removed_detection_ids.get(), std::move(removed_ids)); + + waiting_for_update_detections_.erase(waiting_for_update_detectoin_ptr); } - // TODO: Should be handled automatically in detection manager. - auto removed_ids = tracked_detection_manager_.RemoveObsoleteDetections( - GetInputTimestampMs(cc) - kDetectionUpdateTimeOutMS); + auto removed_ids = tracked_detection_manager_.UpdateDetectionLocation( + tracked_box.id(), bounding_box, tracked_box.time_msec()); MoveIds(removed_detection_ids.get(), std::move(removed_ids)); + } + // TODO: Should be handled automatically in detection manager. + auto removed_ids = tracked_detection_manager_.RemoveObsoleteDetections( + GetInputTimestampMs(cc) - kDetectionUpdateTimeOutMS); + MoveIds(removed_detection_ids.get(), std::move(removed_ids)); - // TODO: Should be handled automatically in detection manager. - removed_ids = tracked_detection_manager_.RemoveOutOfViewDetections(); - MoveIds(removed_detection_ids.get(), std::move(removed_ids)); + // TODO: Should be handled automatically in detection manager. + removed_ids = tracked_detection_manager_.RemoveOutOfViewDetections(); + MoveIds(removed_detection_ids.get(), std::move(removed_ids)); - if (!removed_detection_ids->empty() && - cc->Outputs().HasTag(kCancelObjectIdTag)) { - auto timestamp = cc->InputTimestamp(); - for (int box_id : *removed_detection_ids) { - // The timestamp is incremented (by 1 us) because currently the box - // tracker calculator only accepts one cancel object ID for any given - // timestamp. - cc->Outputs() - .Tag(kCancelObjectIdTag) - .AddPacket(mediapipe::MakePacket(box_id).At(timestamp++)); - } - } - - // Output detections and corresponding bounding boxes. - const auto& all_detections = - tracked_detection_manager_.GetAllTrackedDetections(); - auto output_detections = absl::make_unique>(); - auto output_boxes = absl::make_unique>(); - - for (const auto& detection_ptr : all_detections) { - const auto& detection = *detection_ptr.second; - // Only output detections that are synced. - if (detection.last_updated_timestamp() < - cc->InputTimestamp().Microseconds() / 1000) { - continue; - } - output_detections->emplace_back( - GetAxisAlignedDetectionFromTrackedDetection(detection)); - output_boxes->emplace_back(detection.bounding_box()); - } - if (cc->Outputs().HasTag(kDetectionsTag)) { + if (!removed_detection_ids->empty() && + cc->Outputs().HasTag(kCancelObjectIdTag)) { + auto timestamp = cc->InputTimestamp(); + for (int box_id : *removed_detection_ids) { + // The timestamp is incremented (by 1 us) because currently the box + // tracker calculator only accepts one cancel object ID for any given + // timestamp. cc->Outputs() - .Tag(kDetectionsTag) - .Add(output_detections.release(), cc->InputTimestamp()); + .Tag(kCancelObjectIdTag) + .AddPacket(mediapipe::MakePacket(box_id).At(timestamp++)); } + } - if (cc->Outputs().HasTag(kDetectionBoxesTag)) { - cc->Outputs() - .Tag(kDetectionBoxesTag) - .Add(output_boxes.release(), cc->InputTimestamp()); + // Output detections and corresponding bounding boxes. + const auto& all_detections = + tracked_detection_manager_.GetAllTrackedDetections(); + auto output_detections = absl::make_unique>(); + auto output_boxes = absl::make_unique>(); + + for (const auto& detection_ptr : all_detections) { + const auto& detection = *detection_ptr.second; + // Only output detections that are synced. + if (detection.last_updated_timestamp() < + cc->InputTimestamp().Microseconds() / 1000) { + continue; } + output_detections->emplace_back( + GetAxisAlignedDetectionFromTrackedDetection(detection)); + output_boxes->emplace_back(detection.bounding_box()); + } + if (cc->Outputs().HasTag(kDetectionsTag)) { + cc->Outputs() + .Tag(kDetectionsTag) + .Add(output_detections.release(), cc->InputTimestamp()); + } + + if (cc->Outputs().HasTag(kDetectionBoxesTag)) { + cc->Outputs() + .Tag(kDetectionBoxesTag) + .Add(output_boxes.release(), cc->InputTimestamp()); } } diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD new file mode 100644 index 000000000..660382c5c --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/BUILD @@ -0,0 +1,62 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_gpu_deps", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +android_binary( + name = "upperbodyposetrackinggpu", + srcs = glob(["*.java"]), + assets = [ + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_gpu.binarypb", + "//mediapipe/modules/pose_landmark:pose_landmark_upper_body.tflite", + "//mediapipe/modules/pose_detection:pose_detection.tflite", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.upperbodyposetrackinggpu", + "appName": "Upper Body Pose Tracking", + "mainActivity": ".MainActivity", + "cameraFacingFront": "False", + "binaryGraphName": "upper_body_pose_tracking_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + }, + multidex = "native", + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/MainActivity.java new file mode 100644 index 000000000..99a3a81ed --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/upperbodyposetrackinggpu/MainActivity.java @@ -0,0 +1,75 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.upperbodyposetrackinggpu; + +import android.os.Bundle; +import android.util.Log; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.framework.PacketGetter; +import com.google.protobuf.InvalidProtocolBufferException; + +/** Main activity of MediaPipe upper-body pose tracking app. */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "MainActivity"; + + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "pose_landmarks"; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + Log.v(TAG, "Received pose landmarks packet."); + try { + NormalizedLandmarkList poseLandmarks = + PacketGetter.getProto(packet, NormalizedLandmarkList.class); + Log.v( + TAG, + "[TS:" + + packet.getTimestamp() + + "] " + + getPoseLandmarksDebugString(poseLandmarks)); + } catch (InvalidProtocolBufferException exception) { + Log.e(TAG, "Failed to get proto.", exception); + } + }); + } + } + + private static String getPoseLandmarksDebugString(NormalizedLandmarkList poseLandmarks) { + String poseLandmarkStr = "Pose landmarks: " + poseLandmarks.getLandmarkCount() + "\n"; + int landmarkIndex = 0; + for (NormalizedLandmark landmark : poseLandmarks.getLandmarkList()) { + poseLandmarkStr += + "\tLandmark [" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + return poseLandmarkStr; + } +} diff --git a/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD b/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD new file mode 100644 index 000000000..9e32c5681 --- /dev/null +++ b/mediapipe/examples/desktop/upper_body_pose_tracking/BUILD @@ -0,0 +1,34 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "upper_body_pose_tracking_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_cpu_deps", + ], +) + +# Linux only +cc_binary( + name = "upper_body_pose_tracking_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_gpu", + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_gpu_deps", + ], +) diff --git a/mediapipe/examples/ios/edgedetectiongpu/AppDelegate.h b/mediapipe/examples/ios/common/AppDelegate.h similarity index 100% rename from mediapipe/examples/ios/edgedetectiongpu/AppDelegate.h rename to mediapipe/examples/ios/common/AppDelegate.h diff --git a/mediapipe/examples/ios/facemeshgpu/AppDelegate.m b/mediapipe/examples/ios/common/AppDelegate.mm similarity index 97% rename from mediapipe/examples/ios/facemeshgpu/AppDelegate.m rename to mediapipe/examples/ios/common/AppDelegate.mm index 9e1b7ff0e..1746c2267 100644 --- a/mediapipe/examples/ios/facemeshgpu/AppDelegate.m +++ b/mediapipe/examples/ios/common/AppDelegate.mm @@ -14,6 +14,8 @@ #import "AppDelegate.h" +#import "mediapipe/examples/ios/common/CommonViewController.h" + @interface AppDelegate () @end diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_1x.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_1x.png new file mode 100644 index 000000000..ed44f758e Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_1x.png differ diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_2x.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_2x.png new file mode 100644 index 000000000..5855920a6 Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_2x.png differ diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_3x.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_3x.png new file mode 100644 index 000000000..c96e2f0aa Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/40_c_3x.png differ diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_2x.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_2x.png new file mode 100644 index 000000000..3037b13eb Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_2x.png differ diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_3x.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_3x.png new file mode 100644 index 000000000..1e905ec27 Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/60_c_iphone_3x.png differ diff --git a/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/76_c_Ipad.png b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/76_c_Ipad.png new file mode 100644 index 000000000..d28effc39 Binary files /dev/null and b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/76_c_Ipad.png differ diff --git a/mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/Contents.json similarity index 87% rename from mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json rename to mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/Contents.json index a1895a242..8ae934c76 100644 --- a/mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ b/mediapipe/examples/ios/common/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -23,21 +23,25 @@ { "idiom" : "iphone", "size" : "40x40", + "filename" : "40_c_2x.png", "scale" : "2x" }, { "idiom" : "iphone", "size" : "40x40", + "filename" : "40_c_3x.png", "scale" : "3x" }, { "idiom" : "iphone", "size" : "60x60", + "filename" : "60_c_iphone_2x.png", "scale" : "2x" }, { "idiom" : "iphone", "size" : "60x60", + "filename" : "60_c_iphone_3x.png", "scale" : "3x" }, { @@ -63,6 +67,7 @@ { "idiom" : "ipad", "size" : "40x40", + "filename" : "40_c_1x.png", "scale" : "1x" }, { @@ -73,6 +78,7 @@ { "idiom" : "ipad", "size" : "76x76", + "filename" : "76_c_Ipad.png", "scale" : "1x" }, { diff --git a/mediapipe/examples/ios/edgedetectiongpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/common/Assets.xcassets/Contents.json similarity index 100% rename from mediapipe/examples/ios/edgedetectiongpu/Assets.xcassets/Contents.json rename to mediapipe/examples/ios/common/Assets.xcassets/Contents.json diff --git a/mediapipe/examples/ios/common/BUILD b/mediapipe/examples/ios/common/BUILD new file mode 100644 index 000000000..0f3d34cd1 --- /dev/null +++ b/mediapipe/examples/ios/common/BUILD @@ -0,0 +1,52 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +objc_library( + name = "CommonMediaPipeAppLibrary", + srcs = [ + "AppDelegate.mm", + "CommonViewController.mm", + "main.m", + ], + hdrs = [ + "AppDelegate.h", + "CommonViewController.h", + ], + data = [ + "Base.lproj/LaunchScreen.storyboard", + "Base.lproj/Main.storyboard", + ], + sdk_frameworks = [ + "AVFoundation", + "CoreGraphics", + "CoreMedia", + "UIKit", + ], + visibility = ["//mediapipe:__subpackages__"], + deps = [ + "//mediapipe/objc:mediapipe_framework_ios", + "//mediapipe/objc:mediapipe_input_sources_ios", + "//mediapipe/objc:mediapipe_layer_renderer", + ], +) + +exports_files(["Info.plist"]) + +filegroup( + name = "AppIcon", + srcs = glob(["Assets.xcassets/AppIcon.appiconset/*"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/common/Base.lproj/LaunchScreen.storyboard similarity index 100% rename from mediapipe/examples/ios/edgedetectiongpu/Base.lproj/LaunchScreen.storyboard rename to mediapipe/examples/ios/common/Base.lproj/LaunchScreen.storyboard diff --git a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/common/Base.lproj/Main.storyboard similarity index 85% rename from mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard rename to mediapipe/examples/ios/common/Base.lproj/Main.storyboard index 20845c12f..fcf71c0e2 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard +++ b/mediapipe/examples/ios/common/Base.lproj/Main.storyboard @@ -1,5 +1,5 @@ - + @@ -7,10 +7,10 @@ - + - + @@ -37,8 +37,8 @@ - - + + diff --git a/mediapipe/examples/ios/common/CommonViewController.h b/mediapipe/examples/ios/common/CommonViewController.h new file mode 100644 index 000000000..b4650423b --- /dev/null +++ b/mediapipe/examples/ios/common/CommonViewController.h @@ -0,0 +1,63 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/objc/MPPCameraInputSource.h" +#import "mediapipe/objc/MPPGraph.h" +#import "mediapipe/objc/MPPLayerRenderer.h" +#import "mediapipe/objc/MPPPlayerInputSource.h" + +typedef NS_ENUM(NSInteger, MediaPipeDemoSourceMode) { + MediaPipeDemoSourceCamera, + MediaPipeDemoSourceVideo +}; + +@interface CommonViewController : UIViewController + +// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in +// viewWillAppear: and sent video frames on videoQueue. +@property(nonatomic) MPPGraph* mediapipeGraph; + +// Handles camera access via AVCaptureSession library. +@property(nonatomic) MPPCameraInputSource* cameraSource; + +// Provides data from a video. +@property(nonatomic) MPPPlayerInputSource* videoSource; + +// The data source for the demo. +@property(nonatomic) MediaPipeDemoSourceMode sourceMode; + +// Inform the user when camera is unavailable. +@property(nonatomic) IBOutlet UILabel* noCameraLabel; + +// Display the camera preview frames. +@property(strong, nonatomic) IBOutlet UIView* liveView; + +// Render frames in a layer. +@property(nonatomic) MPPLayerRenderer* renderer; + +// Process camera frames on this queue. +@property(nonatomic) dispatch_queue_t videoQueue; + +// Graph name. +@property(nonatomic) NSString* graphName; + +// Graph input stream. +@property(nonatomic) const char* graphInputStream; + +// Graph output stream. +@property(nonatomic) const char* graphOutputStream; + +@end diff --git a/mediapipe/examples/ios/objectdetectiongpu/ViewController.mm b/mediapipe/examples/ios/common/CommonViewController.mm similarity index 54% rename from mediapipe/examples/ios/objectdetectiongpu/ViewController.mm rename to mediapipe/examples/ios/common/CommonViewController.mm index fc667d9d7..aa7eb5d57 100644 --- a/mediapipe/examples/ios/objectdetectiongpu/ViewController.mm +++ b/mediapipe/examples/ios/common/CommonViewController.mm @@ -12,46 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -#import "ViewController.h" +#import "CommonViewController.h" -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" -#import "mediapipe/objc/MPPPlayerInputSource.h" - -static NSString* const kGraphName = @"mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; -@interface ViewController () +@implementation CommonViewController -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - MPPPlayerInputSource* _videoSource; - MediaPipeDemoSourceMode _sourceMode; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -- (void)setSourceMode:(MediaPipeDemoSourceMode)mode { - _sourceMode = mode; +// This provides a hook to replace the basic ViewController with a subclass when it's created from a +// storyboard, without having to change the storyboard itself. ++ (instancetype)allocWithZone:(struct _NSZone*)zone { + NSString* subclassName = [[NSBundle mainBundle] objectForInfoDictionaryKey:@"MainViewController"]; + if (subclassName.length > 0) { + Class customClass = NSClassFromString(subclassName); + Class baseClass = [CommonViewController class]; + NSAssert([customClass isSubclassOfClass:baseClass], @"%@ must be a subclass of %@", customClass, + baseClass); + if (self == baseClass) return [customClass allocWithZone:zone]; + } + return [super allocWithZone:zone]; } #pragma mark - Cleanup methods @@ -86,7 +64,6 @@ static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; return newGraph; } @@ -95,19 +72,26 @@ static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - (void)viewDidLoad { [super viewDidLoad]; - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; + self.renderer = [[MPPLayerRenderer alloc] init]; + self.renderer.layer.frame = self.liveView.layer.bounds; + [self.liveView.layer addSublayer:self.renderer.layer]; + self.renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); + self.videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); + + self.graphName = [[NSBundle mainBundle] objectForInfoDictionaryKey:@"GraphName"]; + self.graphInputStream = + [[[NSBundle mainBundle] objectForInfoDictionaryKey:@"GraphInputStream"] UTF8String]; + self.graphOutputStream = + [[[NSBundle mainBundle] objectForInfoDictionaryKey:@"GraphOutputStream"] UTF8String]; + + self.mediapipeGraph = [[self class] loadGraphFromResource:self.graphName]; + [self.mediapipeGraph addFrameOutputStream:self.graphOutputStream + outputPacketType:MPPPacketTypePixelBuffer]; - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; } // In this application, there is only one ViewController which has no navigation to other view @@ -119,43 +103,77 @@ static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - (void)viewWillAppear:(BOOL)animated { [super viewWillAppear:animated]; + switch (self.sourceMode) { + case MediaPipeDemoSourceVideo: { + NSString* videoName = [[NSBundle mainBundle] objectForInfoDictionaryKey:@"VideoName"]; + AVAsset* video = [AVAsset assetWithURL:[[NSBundle mainBundle] URLForResource:videoName + withExtension:@"mov"]]; + self.videoSource = [[MPPPlayerInputSource alloc] initWithAVAsset:video]; + [self.videoSource setDelegate:self queue:self.videoQueue]; + dispatch_async(self.videoQueue, ^{ + [self.videoSource start]; + }); + break; + } + case MediaPipeDemoSourceCamera: { + self.cameraSource = [[MPPCameraInputSource alloc] init]; + [self.cameraSource setDelegate:self queue:self.videoQueue]; + self.cameraSource.sessionPreset = AVCaptureSessionPresetHigh; + + NSString* cameraPosition = + [[NSBundle mainBundle] objectForInfoDictionaryKey:@"CameraPosition"]; + if (cameraPosition.length > 0 && [cameraPosition isEqualToString:@"back"]) { + self.cameraSource.cameraPosition = AVCaptureDevicePositionBack; + } else { + self.cameraSource.cameraPosition = AVCaptureDevicePositionFront; + // When using the front camera, mirror the input for a more natural look. + _cameraSource.videoMirrored = YES; + } + + // The frame's native format is rotated with respect to the portrait orientation. + _cameraSource.orientation = AVCaptureVideoOrientationPortrait; + + [self.cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { + if (granted) { + [self startGraphAndCamera]; + dispatch_async(dispatch_get_main_queue(), ^{ + self.noCameraLabel.hidden = YES; + }); + } + }]; + + break; + } + } +} + +- (void)startGraphAndCamera { // Start running self.mediapipeGraph. NSError* error; if (![self.mediapipeGraph startWithError:&error]) { NSLog(@"Failed to start graph: %@", error); } - switch (_sourceMode) { - case MediaPipeDemoSourceVideo: { - AVAsset* video = - [AVAsset assetWithURL:[[NSBundle mainBundle] URLForResource:@"object_detection" - withExtension:@"mov"]]; - _videoSource = [[MPPPlayerInputSource alloc] initWithAVAsset:video]; - [_videoSource setDelegate:self queue:_videoQueue]; - dispatch_async(_videoQueue, ^{ - [_videoSource start]; - }); - break; - } - case MediaPipeDemoSourceBackCamera: - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionBack; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; - break; + // Start fetching frames from the camera. + dispatch_async(self.videoQueue, ^{ + [self.cameraSource start]; + }); +} + +#pragma mark - MPPInputSourceDelegate methods + +// Must be invoked on self.videoQueue. +- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer + timestamp:(CMTime)timestamp + fromSource:(MPPInputSource*)source { + if (source != self.cameraSource && source != self.videoSource) { + NSLog(@"Unknown source: %@", source); + return; } + + [self.mediapipeGraph sendPixelBuffer:imageBuffer + intoStream:self.graphInputStream + packetType:MPPPacketTypePixelBuffer]; } #pragma mark - MPPGraphDelegate methods @@ -164,29 +182,14 @@ static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - (void)mediapipeGraph:(MPPGraph*)graph didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { + if (streamName == self.graphOutputStream) { // Display the captured image on the screen. CVPixelBufferRetain(pixelBuffer); dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; + [self.renderer renderPixelBuffer:pixelBuffer]; CVPixelBufferRelease(pixelBuffer); }); } } -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource && source != _videoSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - @end diff --git a/mediapipe/examples/ios/edgedetectiongpu/Info.plist b/mediapipe/examples/ios/common/Info.plist similarity index 95% rename from mediapipe/examples/ios/edgedetectiongpu/Info.plist rename to mediapipe/examples/ios/common/Info.plist index c7f7ec816..30db14c62 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/Info.plist +++ b/mediapipe/examples/ios/common/Info.plist @@ -37,7 +37,6 @@ UISupportedInterfaceOrientations~ipad UIInterfaceOrientationPortrait - UIInterfaceOrientationPortraitUpsideDown diff --git a/mediapipe/examples/ios/edgedetectiongpu/main.m b/mediapipe/examples/ios/common/main.m similarity index 100% rename from mediapipe/examples/ios/edgedetectiongpu/main.m rename to mediapipe/examples/ios/common/main.m diff --git a/mediapipe/examples/ios/edgedetectiongpu/AppDelegate.m b/mediapipe/examples/ios/edgedetectiongpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/edgedetectiongpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/edgedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/edgedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/edgedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/edgedetectiongpu/ViewController.h b/mediapipe/examples/ios/edgedetectiongpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/edgedetectiongpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/edgedetectiongpu/ViewController.mm b/mediapipe/examples/ios/edgedetectiongpu/ViewController.mm deleted file mode 100644 index 371ddeb3f..000000000 --- a/mediapipe/examples/ios/edgedetectiongpu/ViewController.mm +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -static NSString* const kGraphName = @"mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionBack; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - [_noCameraLabel setHidden:YES]; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/facedetectioncpu/AppDelegate.m b/mediapipe/examples/ios/facedetectioncpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/facedetectioncpu/BUILD b/mediapipe/examples/ios/facedetectioncpu/BUILD index 0387ae8a4..69d54d72b 100644 --- a/mediapipe/examples/ios/facedetectioncpu/BUILD +++ b/mediapipe/examples/ios/facedetectioncpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "FaceDetectionCpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".FaceDetectionCpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -49,32 +53,13 @@ ios_application( objc_library( name = "FaceDetectionCpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/face_detection:mobile_cpu_binary_graph", "//mediapipe/models:face_detection_front.tflite", "//mediapipe/models:face_detection_front_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/facedetectioncpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/facedetectioncpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facedetectioncpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/facedetectioncpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facedetectioncpu/Info.plist b/mediapipe/examples/ios/facedetectioncpu/Info.plist index 30db14c62..d1738a5c7 100644 --- a/mediapipe/examples/ios/facedetectioncpu/Info.plist +++ b/mediapipe/examples/ios/facedetectioncpu/Info.plist @@ -2,41 +2,13 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + mobile_cpu diff --git a/mediapipe/examples/ios/facedetectioncpu/ViewController.h b/mediapipe/examples/ios/facedetectioncpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/facedetectioncpu/ViewController.mm b/mediapipe/examples/ios/facedetectioncpu/ViewController.mm deleted file mode 100644 index b212730a8..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/ViewController.mm +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -static NSString* const kGraphName = @"mobile_cpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - [_noCameraLabel setHidden:YES]; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/facedetectioncpu/main.m b/mediapipe/examples/ios/facedetectioncpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/facedetectioncpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/facedetectiongpu/AppDelegate.m b/mediapipe/examples/ios/facedetectiongpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/facedetectiongpu/BUILD b/mediapipe/examples/ios/facedetectiongpu/BUILD index 87f0d7894..b06a0a077 100644 --- a/mediapipe/examples/ios/facedetectiongpu/BUILD +++ b/mediapipe/examples/ios/facedetectiongpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "FaceDetectionGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".FaceDetectionGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -49,32 +53,13 @@ ios_application( objc_library( name = "FaceDetectionGpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/face_detection:mobile_gpu_binary_graph", "//mediapipe/models:face_detection_front.tflite", "//mediapipe/models:face_detection_front_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/facedetectiongpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/facedetectiongpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facedetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/facedetectiongpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facedetectiongpu/Info.plist b/mediapipe/examples/ios/facedetectiongpu/Info.plist index 30db14c62..6b4790734 100644 --- a/mediapipe/examples/ios/facedetectiongpu/Info.plist +++ b/mediapipe/examples/ios/facedetectiongpu/Info.plist @@ -2,41 +2,13 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + mobile_gpu diff --git a/mediapipe/examples/ios/facedetectiongpu/ViewController.h b/mediapipe/examples/ios/facedetectiongpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/facedetectiongpu/ViewController.mm b/mediapipe/examples/ios/facedetectiongpu/ViewController.mm deleted file mode 100644 index 1e1b46ac7..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/ViewController.mm +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -static NSString* const kGraphName = @"mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - [_noCameraLabel setHidden:YES]; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/facedetectiongpu/main.m b/mediapipe/examples/ios/facedetectiongpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/facedetectiongpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/facemeshgpu/BUILD b/mediapipe/examples/ios/facemeshgpu/BUILD index b1e169bf7..dbe842285 100644 --- a/mediapipe/examples/ios/facemeshgpu/BUILD +++ b/mediapipe/examples/ios/facemeshgpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "FaceMeshGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".FaceMeshGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -50,31 +54,18 @@ ios_application( objc_library( name = "FaceMeshGpuAppLibrary", srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", + "FaceMeshGpuViewController.mm", ], hdrs = [ - "AppDelegate.h", - "ViewController.h", + "FaceMeshGpuViewController.h", ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph", "//mediapipe/modules/face_detection:face_detection_front.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facedetectiongpu/AppDelegate.h b/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.h similarity index 83% rename from mediapipe/examples/ios/facedetectiongpu/AppDelegate.h rename to mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.h index 6b0377ef2..520940f59 100644 --- a/mediapipe/examples/ios/facedetectiongpu/AppDelegate.h +++ b/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.h @@ -14,8 +14,8 @@ #import -@interface AppDelegate : UIResponder +#import "mediapipe/examples/ios/common/CommonViewController.h" -@property(strong, nonatomic) UIWindow *window; +@interface FaceMeshGpuViewController : CommonViewController @end diff --git a/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.mm b/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.mm new file mode 100644 index 000000000..6f11be42b --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/FaceMeshGpuViewController.mm @@ -0,0 +1,65 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "FaceMeshGpuViewController.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static NSString* const kGraphName = @"face_mesh_mobile_gpu"; + +static const char* kNumFacesInputSidePacket = "num_faces"; +static const char* kLandmarksOutputStream = "multi_face_landmarks"; + +// Max number of faces to detect/process. +static const int kNumFaces = 1; + +@implementation FaceMeshGpuViewController + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self.mediapipeGraph setSidePacket:(mediapipe::MakePacket(kNumFaces)) + named:kNumFacesInputSidePacket]; + [self.mediapipeGraph addFrameOutputStream:kLandmarksOutputStream + outputPacketType:MPPPacketTypeRaw]; +} + +#pragma mark - MPPGraphDelegate methods + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No face landmarks", packet.Timestamp().Value()); + return; + } + const auto& multi_face_landmarks = packet.Get>(); + NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(), + multi_face_landmarks.size()); + for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) { + const auto& landmarks = multi_face_landmarks[face_index]; + NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } + } +} + +@end diff --git a/mediapipe/examples/ios/facemeshgpu/Info.plist b/mediapipe/examples/ios/facemeshgpu/Info.plist index 30db14c62..2684c8cab 100644 --- a/mediapipe/examples/ios/facemeshgpu/Info.plist +++ b/mediapipe/examples/ios/facemeshgpu/Info.plist @@ -2,41 +2,15 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + MainViewController + FaceMeshGpuViewController + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + face_mesh_mobile_gpu diff --git a/mediapipe/examples/ios/facemeshgpu/ViewController.h b/mediapipe/examples/ios/facemeshgpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/facemeshgpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/facemeshgpu/ViewController.mm b/mediapipe/examples/ios/facemeshgpu/ViewController.mm deleted file mode 100644 index 1071b1708..000000000 --- a/mediapipe/examples/ios/facemeshgpu/ViewController.mm +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -#include "mediapipe/framework/formats/landmark.pb.h" - -static NSString* const kGraphName = @"face_mesh_mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kNumFacesInputSidePacket = "num_faces"; -static const char* kOutputStream = "output_video"; -static const char* kLandmarksOutputStream = "multi_face_landmarks"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -// Max number of faces to detect/process. -static const int kNumFaces = 1; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; - [newGraph setSidePacket:(mediapipe::MakePacket(kNumFaces)) named:kNumFacesInputSidePacket]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPacket:(const ::mediapipe::Packet&)packet - fromStream:(const std::string&)streamName { - if (streamName == kLandmarksOutputStream) { - if (packet.IsEmpty()) { - NSLog(@"[TS:%lld] No face landmarks", packet.Timestamp().Value()); - return; - } - const auto& multi_face_landmarks = packet.Get>(); - NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(), - multi_face_landmarks.size()); - for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) { - const auto& landmarks = multi_face_landmarks[face_index]; - NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size()); - for (int i = 0; i < landmarks.landmark_size(); ++i) { - NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), - landmarks.landmark(i).y(), landmarks.landmark(i).z()); - } - } - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/facemeshgpu/main.m b/mediapipe/examples/ios/facemeshgpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/facemeshgpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/handdetectiongpu/AppDelegate.h b/mediapipe/examples/ios/handdetectiongpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/handdetectiongpu/AppDelegate.m b/mediapipe/examples/ios/handdetectiongpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/handdetectiongpu/BUILD b/mediapipe/examples/ios/handdetectiongpu/BUILD index 9507e81cc..953f80719 100644 --- a/mediapipe/examples/ios/handdetectiongpu/BUILD +++ b/mediapipe/examples/ios/handdetectiongpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "HandDetectionGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".HandDetectionGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -49,32 +53,13 @@ ios_application( objc_library( name = "HandDetectionGpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph", "//mediapipe/models:palm_detection.tflite", "//mediapipe/models:palm_detection_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/handdetectiongpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/handdetectiongpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/handdetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/handdetectiongpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/handdetectiongpu/Info.plist b/mediapipe/examples/ios/handdetectiongpu/Info.plist index 30db14c62..937a29569 100644 --- a/mediapipe/examples/ios/handdetectiongpu/Info.plist +++ b/mediapipe/examples/ios/handdetectiongpu/Info.plist @@ -2,41 +2,13 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + hand_detection_mobile_gpu diff --git a/mediapipe/examples/ios/handdetectiongpu/ViewController.h b/mediapipe/examples/ios/handdetectiongpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/handdetectiongpu/ViewController.mm b/mediapipe/examples/ios/handdetectiongpu/ViewController.mm deleted file mode 100644 index fd2bda974..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/ViewController.mm +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -static NSString* const kGraphName = @"hand_detection_mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/handdetectiongpu/main.m b/mediapipe/examples/ios/handdetectiongpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/handdetectiongpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/handtrackinggpu/AppDelegate.h b/mediapipe/examples/ios/handtrackinggpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/handtrackinggpu/AppDelegate.m b/mediapipe/examples/ios/handtrackinggpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/handtrackinggpu/BUILD b/mediapipe/examples/ios/handtrackinggpu/BUILD index bfccddd04..162fa2e7d 100644 --- a/mediapipe/examples/ios/handtrackinggpu/BUILD +++ b/mediapipe/examples/ios/handtrackinggpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "HandTrackingGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".HandTrackingGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -50,33 +54,20 @@ ios_application( objc_library( name = "HandTrackingGpuAppLibrary", srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", + "HandTrackingViewController.mm", ], hdrs = [ - "AppDelegate.h", - "ViewController.h", + "HandTrackingViewController.h", ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu_binary_graph", "//mediapipe/models:hand_landmark.tflite", "//mediapipe/models:handedness.txt", "//mediapipe/models:palm_detection.tflite", "//mediapipe/models:palm_detection_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/handtrackinggpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/handtrackinggpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/handtrackinggpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/handtrackinggpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/facemeshgpu/AppDelegate.h b/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.h similarity index 83% rename from mediapipe/examples/ios/facemeshgpu/AppDelegate.h rename to mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.h index 6b0377ef2..a3d521f22 100644 --- a/mediapipe/examples/ios/facemeshgpu/AppDelegate.h +++ b/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.h @@ -14,8 +14,8 @@ #import -@interface AppDelegate : UIResponder +#import "mediapipe/examples/ios/common/CommonViewController.h" -@property(strong, nonatomic) UIWindow *window; +@interface HandTrackingViewController : CommonViewController @end diff --git a/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm b/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm new file mode 100644 index 000000000..491d65459 --- /dev/null +++ b/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm @@ -0,0 +1,53 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "HandTrackingViewController.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static const char* kLandmarksOutputStream = "hand_landmarks"; + +@implementation HandTrackingViewController + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self.mediapipeGraph addFrameOutputStream:kLandmarksOutputStream + outputPacketType:MPPPacketTypeRaw]; +} + +#pragma mark - MPPGraphDelegate methods + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); + return; + } + const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); + NSLog(@"[TS:%lld] Number of landmarks on hand: %d", packet.Timestamp().Value(), + landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } +} + +@end diff --git a/mediapipe/examples/ios/handtrackinggpu/Info.plist b/mediapipe/examples/ios/handtrackinggpu/Info.plist index 30db14c62..a5eebbefa 100644 --- a/mediapipe/examples/ios/handtrackinggpu/Info.plist +++ b/mediapipe/examples/ios/handtrackinggpu/Info.plist @@ -2,41 +2,15 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + MainViewController + HandTrackingViewController + GraphInputStream + input_video + GraphOutputStream + output_video + GraphName + hand_tracking_mobile_gpu diff --git a/mediapipe/examples/ios/handtrackinggpu/ViewController.h b/mediapipe/examples/ios/handtrackinggpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/handtrackinggpu/ViewController.mm b/mediapipe/examples/ios/handtrackinggpu/ViewController.mm deleted file mode 100644 index fd50aec39..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/ViewController.mm +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -#include "mediapipe/framework/formats/landmark.pb.h" - -static NSString* const kGraphName = @"hand_tracking_mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kLandmarksOutputStream = "hand_landmarks"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPacket:(const ::mediapipe::Packet&)packet - fromStream:(const std::string&)streamName { - if (streamName == kLandmarksOutputStream) { - if (packet.IsEmpty()) { - NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); - return; - } - const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); - NSLog(@"[TS:%lld] Number of landmarks on hand: %d", packet.Timestamp().Value(), - landmarks.landmark_size()); - for (int i = 0; i < landmarks.landmark_size(); ++i) { - NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), - landmarks.landmark(i).y(), landmarks.landmark(i).z()); - } - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/handtrackinggpu/main.m b/mediapipe/examples/ios/handtrackinggpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/handtrackinggpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/edgedetectiongpu/BUILD b/mediapipe/examples/ios/helloworld/BUILD similarity index 60% rename from mediapipe/examples/ios/edgedetectiongpu/BUILD rename to mediapipe/examples/ios/helloworld/BUILD index 46fb32a94..b1916117d 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/BUILD +++ b/mediapipe/examples/ios/helloworld/BUILD @@ -27,49 +27,34 @@ licenses(["notice"]) # Apache 2.0 MIN_IOS_VERSION = "10.0" alias( - name = "edgedetectiongpu", - actual = "EdgeDetectionGpuApp", + name = "helloworld", + actual = "HelloWorldApp", ) ios_application( - name = "EdgeDetectionGpuApp", - bundle_id = BUNDLE_ID_PREFIX + ".EdgeDetectionGpu", + name = "HelloWorldApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], + bundle_id = BUNDLE_ID_PREFIX + ".HelloWorld", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), - deps = [":EdgeDetectionGpuAppLibrary"], + deps = [":HelloWorldAppLibrary"], ) objc_library( - name = "EdgeDetectionGpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], + name = "HelloWorldAppLibrary", data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/edge_detection:mobile_gpu_binary_graph", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", "//mediapipe/graphs/edge_detection:mobile_calculators", - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", ], ) diff --git a/mediapipe/examples/ios/helloworld/Info.plist b/mediapipe/examples/ios/helloworld/Info.plist new file mode 100644 index 000000000..7e792c9b4 --- /dev/null +++ b/mediapipe/examples/ios/helloworld/Info.plist @@ -0,0 +1,14 @@ + + + + + CameraPosition + back + GraphName + mobile_gpu + GraphOutputStream + output_video + GraphInputStream + input_video + + diff --git a/mediapipe/examples/ios/iristrackinggpu/AppDelegate.h b/mediapipe/examples/ios/iristrackinggpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/iristrackinggpu/AppDelegate.m b/mediapipe/examples/ios/iristrackinggpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/iristrackinggpu/BUILD b/mediapipe/examples/ios/iristrackinggpu/BUILD index e6c5fcb31..c5d039236 100644 --- a/mediapipe/examples/ios/iristrackinggpu/BUILD +++ b/mediapipe/examples/ios/iristrackinggpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "IrisTrackingGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".IrisTrackingGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -50,32 +54,19 @@ ios_application( objc_library( name = "IrisTrackingGpuAppLibrary", srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", + "IrisTrackingViewController.mm", ], hdrs = [ - "AppDelegate.h", - "ViewController.h", + "IrisTrackingViewController.h", ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb", "//mediapipe/modules/face_detection:face_detection_front.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/iris_landmark:iris_landmark.tflite", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/iristrackinggpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/iristrackinggpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/iristrackinggpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/iristrackinggpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/iristrackinggpu/Info.plist b/mediapipe/examples/ios/iristrackinggpu/Info.plist index 30db14c62..aadef04ae 100644 --- a/mediapipe/examples/ios/iristrackinggpu/Info.plist +++ b/mediapipe/examples/ios/iristrackinggpu/Info.plist @@ -2,41 +2,15 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + MainViewController + IrisTrackingViewController + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + iris_tracking_gpu diff --git a/mediapipe/examples/ios/facedetectioncpu/AppDelegate.h b/mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.h similarity index 83% rename from mediapipe/examples/ios/facedetectioncpu/AppDelegate.h rename to mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.h index 6b0377ef2..ce133f330 100644 --- a/mediapipe/examples/ios/facedetectioncpu/AppDelegate.h +++ b/mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.h @@ -14,8 +14,8 @@ #import -@interface AppDelegate : UIResponder +#import "mediapipe/examples/ios/common/CommonViewController.h" -@property(strong, nonatomic) UIWindow *window; +@interface IrisTrackingViewController : CommonViewController @end diff --git a/mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.mm b/mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.mm new file mode 100644 index 000000000..52687efa9 --- /dev/null +++ b/mediapipe/examples/ios/iristrackinggpu/IrisTrackingViewController.mm @@ -0,0 +1,83 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "IrisTrackingViewController.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static const char* kLandmarksOutputStream = "iris_landmarks"; + +@implementation IrisTrackingViewController { + /// Input side packet for focal length parameter. + std::map _input_side_packets; + mediapipe::Packet _focal_length_side_packet; +} + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self.mediapipeGraph addFrameOutputStream:kLandmarksOutputStream + outputPacketType:MPPPacketTypeRaw]; + _focal_length_side_packet = + mediapipe::MakePacket>(absl::make_unique(0.0)); + _input_side_packets = { + {"focal_length_pixel", _focal_length_side_packet}, + }; + [self.mediapipeGraph addSidePackets:_input_side_packets]; +} + +#pragma mark - MPPGraphDelegate methods + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No iris landmarks", packet.Timestamp().Value()); + return; + } + const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); + NSLog(@"[TS:%lld] Number of landmarks on iris: %d", packet.Timestamp().Value(), + landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } +} + +#pragma mark - MPPInputSourceDelegate methods + +// Must be invoked on _videoQueue. +- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer + timestamp:(CMTime)timestamp + fromSource:(MPPInputSource*)source { + if (source != self.cameraSource) { + NSLog(@"Unknown source: %@", source); + return; + } + + // TODO: This is a temporary solution. Need to verify whether the focal length is + // constant. In that case, we need to use input stream instead of using side packet. + *(_input_side_packets["focal_length_pixel"].Get>()) = + self.cameraSource.cameraIntrinsicMatrix.columns[0][0]; + [self.mediapipeGraph sendPixelBuffer:imageBuffer + intoStream:self.graphInputStream + packetType:MPPPacketTypePixelBuffer]; +} + +@end diff --git a/mediapipe/examples/ios/iristrackinggpu/ViewController.h b/mediapipe/examples/ios/iristrackinggpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/iristrackinggpu/ViewController.mm b/mediapipe/examples/ios/iristrackinggpu/ViewController.mm deleted file mode 100644 index 29aa74210..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/ViewController.mm +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -#include "mediapipe/framework/formats/landmark.pb.h" - -static NSString* const kGraphName = @"iris_tracking_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kLandmarksOutputStream = "iris_landmarks"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - /// Input side packet for focal length parameter. - std::map _input_side_packets; - mediapipe::Packet _focal_length_side_packet; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; - - _focal_length_side_packet = - mediapipe::MakePacket>(absl::make_unique(0.0)); - _input_side_packets = { - {"focal_length_pixel", _focal_length_side_packet}, - }; - [self.mediapipeGraph addSidePackets:_input_side_packets]; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPacket:(const ::mediapipe::Packet&)packet - fromStream:(const std::string&)streamName { - if (streamName == kLandmarksOutputStream) { - if (packet.IsEmpty()) { - NSLog(@"[TS:%lld] No iris landmarks", packet.Timestamp().Value()); - return; - } - const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); - NSLog(@"[TS:%lld] Number of landmarks on iris: %d", packet.Timestamp().Value(), - landmarks.landmark_size()); - for (int i = 0; i < landmarks.landmark_size(); ++i) { - NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), - landmarks.landmark(i).y(), landmarks.landmark(i).z()); - } - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - - // TODO: This is a temporary solution. Need to verify whether the focal length is - // constant. In that case, we need to use input stream instead of using side packet. - *(_input_side_packets["focal_length_pixel"].Get>()) = - _cameraSource.cameraIntrinsicMatrix.columns[0][0]; - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/iristrackinggpu/main.m b/mediapipe/examples/ios/iristrackinggpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/iristrackinggpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/multihandtrackinggpu/BUILD b/mediapipe/examples/ios/multihandtrackinggpu/BUILD index cadc390c9..404a50d9f 100644 --- a/mediapipe/examples/ios/multihandtrackinggpu/BUILD +++ b/mediapipe/examples/ios/multihandtrackinggpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "MultiHandTrackingGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".MultiHandTrackingGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -50,33 +54,20 @@ ios_application( objc_library( name = "MultiHandTrackingGpuAppLibrary", srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", + "MultiHandTrackingViewController.mm", ], hdrs = [ - "AppDelegate.h", - "ViewController.h", + "MultiHandTrackingViewController.h", ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/hand_tracking:multi_hand_tracking_mobile_gpu_binary_graph", "//mediapipe/models:hand_landmark.tflite", "//mediapipe/models:handedness.txt", "//mediapipe/models:palm_detection.tflite", "//mediapipe/models:palm_detection_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Info.plist b/mediapipe/examples/ios/multihandtrackinggpu/Info.plist index 30db14c62..46e3fbd3d 100644 --- a/mediapipe/examples/ios/multihandtrackinggpu/Info.plist +++ b/mediapipe/examples/ios/multihandtrackinggpu/Info.plist @@ -2,41 +2,15 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + front + MainViewController + MultiHandTrackingViewController + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + multi_hand_tracking_mobile_gpu diff --git a/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.h b/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.h new file mode 100644 index 000000000..17ea6feeb --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.h @@ -0,0 +1,21 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/examples/ios/common/CommonViewController.h" + +@interface MultiHandTrackingViewController : CommonViewController + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.mm b/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.mm new file mode 100644 index 000000000..6c1deb7da --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/MultiHandTrackingViewController.mm @@ -0,0 +1,57 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "MultiHandTrackingViewController.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static const char* kLandmarksOutputStream = "multi_hand_landmarks"; + +@implementation MultiHandTrackingViewController + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self.mediapipeGraph addFrameOutputStream:kLandmarksOutputStream + outputPacketType:MPPPacketTypeRaw]; +} + +#pragma mark - MPPGraphDelegate methods + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); + return; + } + const auto& multi_hand_landmarks = packet.Get>(); + NSLog(@"[TS:%lld] Number of hand instances with landmarks: %lu", packet.Timestamp().Value(), + multi_hand_landmarks.size()); + for (int hand_index = 0; hand_index < multi_hand_landmarks.size(); ++hand_index) { + const auto& landmarks = multi_hand_landmarks[hand_index]; + NSLog(@"\tNumber of landmarks for hand[%d]: %d", hand_index, landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } + } +} + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm deleted file mode 100644 index f027a0372..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -#include "mediapipe/framework/formats/landmark.pb.h" - -static NSString* const kGraphName = @"multi_hand_tracking_mobile_gpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kLandmarksOutputStream = "multi_hand_landmarks"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionFront; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - // When using the front camera, mirror the input for a more natural look. - _cameraSource.videoMirrored = YES; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPacket:(const ::mediapipe::Packet&)packet - fromStream:(const std::string&)streamName { - if (streamName == kLandmarksOutputStream) { - if (packet.IsEmpty()) { - NSLog(@"[TS:%lld] No hand landmarks", packet.Timestamp().Value()); - return; - } - const auto& multi_hand_landmarks = packet.Get>(); - NSLog(@"[TS:%lld] Number of hand instances with landmarks: %lu", packet.Timestamp().Value(), - multi_hand_landmarks.size()); - for (int hand_index = 0; hand_index < multi_hand_landmarks.size(); ++hand_index) { - const auto& landmarks = multi_hand_landmarks[hand_index]; - NSLog(@"\tNumber of landmarks for hand[%d]: %d", hand_index, landmarks.landmark_size()); - for (int i = 0; i < landmarks.landmark_size(); ++i) { - NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), - landmarks.landmark(i).y(), landmarks.landmark(i).z()); - } - } - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/main.m b/mediapipe/examples/ios/multihandtrackinggpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/multihandtrackinggpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.h b/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.m b/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.m deleted file mode 100644 index 9e1b7ff0e..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/AppDelegate.m +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - // Override point for customization after application launch. - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/objectdetectioncpu/BUILD b/mediapipe/examples/ios/objectdetectioncpu/BUILD index 91a5a9331..3a18ef89a 100644 --- a/mediapipe/examples/ios/objectdetectioncpu/BUILD +++ b/mediapipe/examples/ios/objectdetectioncpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "ObjectDetectionCpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".ObjectDetectionCpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -49,32 +53,13 @@ ios_application( objc_library( name = "ObjectDetectionCpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/object_detection:mobile_cpu_binary_graph", "//mediapipe/models:ssdlite_object_detection.tflite", "//mediapipe/models:ssdlite_object_detection_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/objectdetectioncpu/Info.plist b/mediapipe/examples/ios/objectdetectioncpu/Info.plist index 30db14c62..e420121c8 100644 --- a/mediapipe/examples/ios/objectdetectioncpu/Info.plist +++ b/mediapipe/examples/ios/objectdetectioncpu/Info.plist @@ -2,41 +2,13 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - + CameraPosition + back + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + mobile_cpu diff --git a/mediapipe/examples/ios/objectdetectioncpu/ViewController.h b/mediapipe/examples/ios/objectdetectioncpu/ViewController.h deleted file mode 100644 index e0a5a6367..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/ViewController.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface ViewController : UIViewController - -@end diff --git a/mediapipe/examples/ios/objectdetectioncpu/ViewController.mm b/mediapipe/examples/ios/objectdetectioncpu/ViewController.mm deleted file mode 100644 index 7118a1fac..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/ViewController.mm +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "ViewController.h" - -#import "mediapipe/objc/MPPGraph.h" -#import "mediapipe/objc/MPPCameraInputSource.h" -#import "mediapipe/objc/MPPLayerRenderer.h" - -static NSString* const kGraphName = @"mobile_cpu"; - -static const char* kInputStream = "input_video"; -static const char* kOutputStream = "output_video"; -static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; - -@interface ViewController () - -// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and -// sent video frames on _videoQueue. -@property(nonatomic) MPPGraph* mediapipeGraph; - -@end - -@implementation ViewController { - /// Handles camera access via AVCaptureSession library. - MPPCameraInputSource* _cameraSource; - - /// Inform the user when camera is unavailable. - IBOutlet UILabel* _noCameraLabel; - /// Display the camera preview frames. - IBOutlet UIView* _liveView; - /// Render frames in a layer. - MPPLayerRenderer* _renderer; - - /// Process camera frames on this queue. - dispatch_queue_t _videoQueue; -} - -#pragma mark - Cleanup methods - -- (void)dealloc { - self.mediapipeGraph.delegate = nil; - [self.mediapipeGraph cancel]; - // Ignore errors since we're cleaning up. - [self.mediapipeGraph closeAllInputStreamsWithError:nil]; - [self.mediapipeGraph waitUntilDoneWithError:nil]; -} - -#pragma mark - MediaPipe graph methods - -+ (MPPGraph*)loadGraphFromResource:(NSString*)resource { - // Load the graph config resource. - NSError* configLoadError = nil; - NSBundle* bundle = [NSBundle bundleForClass:[self class]]; - if (!resource || resource.length == 0) { - return nil; - } - NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; - NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; - if (!data) { - NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); - return nil; - } - - // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. - mediapipe::CalculatorGraphConfig config; - config.ParseFromArray(data.bytes, data.length); - - // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. - MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; - [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; - return newGraph; -} - -#pragma mark - UIViewController methods - -- (void)viewDidLoad { - [super viewDidLoad]; - - _renderer = [[MPPLayerRenderer alloc] init]; - _renderer.layer.frame = _liveView.layer.bounds; - [_liveView.layer addSublayer:_renderer.layer]; - _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; - - dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( - DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); - _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); - - _cameraSource = [[MPPCameraInputSource alloc] init]; - [_cameraSource setDelegate:self queue:_videoQueue]; - _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; - _cameraSource.cameraPosition = AVCaptureDevicePositionBack; - // The frame's native format is rotated with respect to the portrait orientation. - _cameraSource.orientation = AVCaptureVideoOrientationPortrait; - - self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; - self.mediapipeGraph.delegate = self; - // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. - self.mediapipeGraph.maxFramesInFlight = 2; -} - -// In this application, there is only one ViewController which has no navigation to other view -// controllers, and there is only one View with live display showing the result of running the -// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph -// setup/teardown and camera start/stop logic should be updated appropriately in response to the -// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times -// depending on the application navigation flow in that case. -- (void)viewWillAppear:(BOOL)animated { - [super viewWillAppear:animated]; - - [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { - if (granted) { - [self startGraphAndCamera]; - dispatch_async(dispatch_get_main_queue(), ^{ - _noCameraLabel.hidden = YES; - }); - } - }]; -} - -- (void)startGraphAndCamera { - // Start running self.mediapipeGraph. - NSError* error; - if (![self.mediapipeGraph startWithError:&error]) { - NSLog(@"Failed to start graph: %@", error); - } - - // Start fetching frames from the camera. - dispatch_async(_videoQueue, ^{ - [_cameraSource start]; - }); -} - -#pragma mark - MPPGraphDelegate methods - -// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. -- (void)mediapipeGraph:(MPPGraph*)graph - didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer - fromStream:(const std::string&)streamName { - if (streamName == kOutputStream) { - // Display the captured image on the screen. - CVPixelBufferRetain(pixelBuffer); - dispatch_async(dispatch_get_main_queue(), ^{ - [_renderer renderPixelBuffer:pixelBuffer]; - CVPixelBufferRelease(pixelBuffer); - }); - } -} - -#pragma mark - MPPInputSourceDelegate methods - -// Must be invoked on _videoQueue. -- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer - timestamp:(CMTime)timestamp - fromSource:(MPPInputSource*)source { - if (source != _cameraSource) { - NSLog(@"Unknown source: %@", source); - return; - } - [self.mediapipeGraph sendPixelBuffer:imageBuffer - intoStream:kInputStream - packetType:MPPPacketTypePixelBuffer]; -} - -@end diff --git a/mediapipe/examples/ios/objectdetectioncpu/main.m b/mediapipe/examples/ios/objectdetectioncpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/objectdetectioncpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.h b/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.h deleted file mode 100644 index 6b0377ef2..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -@interface AppDelegate : UIResponder - -@property(strong, nonatomic) UIWindow *window; - -@end diff --git a/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.m b/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.m deleted file mode 100644 index cee668142..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/AppDelegate.m +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "AppDelegate.h" -#import "ViewController.h" - -@interface AppDelegate () - -@end - -@implementation AppDelegate - -- (BOOL)application:(UIApplication *)application - didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { - ViewController *viewController = (ViewController *)self.window.rootViewController; - NSURL *url = [launchOptions objectForKey:UIApplicationLaunchOptionsURLKey]; - // Unattended testing on Firebase is enabled by custom URL schema. - if ([url.scheme isEqualToString:@"firebase-game-loop"]) { - [viewController setSourceMode:MediaPipeDemoSourceVideo]; - } else { - [viewController setSourceMode:MediaPipeDemoSourceBackCamera]; - } - return YES; -} - -- (void)applicationWillResignActive:(UIApplication *)application { - // Sent when the application is about to move from active to inactive state. This can occur for - // certain types of temporary interruptions (such as an incoming phone call or SMS message) or - // when the user quits the application and it begins the transition to the background state. Use - // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering - // callbacks. Games should use this method to pause the game. -} - -- (void)applicationDidEnterBackground:(UIApplication *)application { - // Use this method to release shared resources, save user data, invalidate timers, and store - // enough application state information to restore your application to its current state in case - // it is terminated later. If your application supports background execution, this method is - // called instead of applicationWillTerminate: when the user quits. -} - -- (void)applicationWillEnterForeground:(UIApplication *)application { - // Called as part of the transition from the background to the active state; here you can undo - // many of the changes made on entering the background. -} - -- (void)applicationDidBecomeActive:(UIApplication *)application { - // Restart any tasks that were paused (or not yet started) while the application was inactive. If - // the application was previously in the background, optionally refresh the user interface. -} - -- (void)applicationWillTerminate:(UIApplication *)application { - // Called when the application is about to terminate. Save data if appropriate. See also - // applicationDidEnterBackground:. -} - -@end diff --git a/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json deleted file mode 100644 index a1895a242..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/AppIcon.appiconset/Contents.json +++ /dev/null @@ -1,99 +0,0 @@ -{ - "images" : [ - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "20x20", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "29x29", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "40x40", - "scale" : "3x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "2x" - }, - { - "idiom" : "iphone", - "size" : "60x60", - "scale" : "3x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "20x20", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "29x29", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "40x40", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "1x" - }, - { - "idiom" : "ipad", - "size" : "76x76", - "scale" : "2x" - }, - { - "idiom" : "ipad", - "size" : "83.5x83.5", - "scale" : "2x" - }, - { - "idiom" : "ios-marketing", - "size" : "1024x1024", - "scale" : "1x" - } - ], - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/Contents.json deleted file mode 100644 index 7afcdfaf8..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/Assets.xcassets/Contents.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "info" : { - "version" : 1, - "author" : "xcode" - } -} - diff --git a/mediapipe/examples/ios/objectdetectiongpu/BUILD b/mediapipe/examples/ios/objectdetectiongpu/BUILD index 19715532e..b31c13f53 100644 --- a/mediapipe/examples/ios/objectdetectiongpu/BUILD +++ b/mediapipe/examples/ios/objectdetectiongpu/BUILD @@ -33,12 +33,16 @@ alias( ios_application( name = "ObjectDetectionGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], bundle_id = BUNDLE_ID_PREFIX + ".ObjectDetectionGpu", families = [ "iphone", "ipad", ], - infoplists = ["Info.plist"], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], minimum_os_version = MIN_IOS_VERSION, provisioning_profile = example_provisioning(), deps = [ @@ -49,32 +53,13 @@ ios_application( objc_library( name = "ObjectDetectionGpuAppLibrary", - srcs = [ - "AppDelegate.m", - "ViewController.mm", - "main.m", - ], - hdrs = [ - "AppDelegate.h", - "ViewController.h", - ], data = [ - "Base.lproj/LaunchScreen.storyboard", - "Base.lproj/Main.storyboard", "//mediapipe/graphs/object_detection:mobile_gpu_binary_graph", "//mediapipe/models:ssdlite_object_detection.tflite", "//mediapipe/models:ssdlite_object_detection_labelmap.txt", ], - sdk_frameworks = [ - "AVFoundation", - "CoreGraphics", - "CoreMedia", - "UIKit", - ], deps = [ - "//mediapipe/objc:mediapipe_framework_ios", - "//mediapipe/objc:mediapipe_input_sources_ios", - "//mediapipe/objc:mediapipe_layer_renderer", + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", ] + select({ "//mediapipe:ios_i386": [], "//mediapipe:ios_x86_64": [], diff --git a/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/LaunchScreen.storyboard deleted file mode 100644 index bfa361294..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/LaunchScreen.storyboard +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/Main.storyboard deleted file mode 100644 index 20845c12f..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/Base.lproj/Main.storyboard +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/mediapipe/examples/ios/objectdetectiongpu/Info.plist b/mediapipe/examples/ios/objectdetectiongpu/Info.plist index 3a193f784..53930fe4a 100644 --- a/mediapipe/examples/ios/objectdetectiongpu/Info.plist +++ b/mediapipe/examples/ios/objectdetectiongpu/Info.plist @@ -2,54 +2,15 @@ - NSCameraUsageDescription - This app uses the camera to demonstrate live video processing. - CFBundleDevelopmentRegion - en - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundleIdentifier - $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - APPL - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - LSRequiresIPhoneOS - - UILaunchStoryboardName - LaunchScreen - UIMainStoryboardFile - Main - UIRequiredDeviceCapabilities - - armv7 - - UISupportedInterfaceOrientations - - UIInterfaceOrientationPortrait - - UISupportedInterfaceOrientations~ipad - - UIInterfaceOrientationPortrait - - CFBundleURLTypes - - - CFBundleURLName - com.google.firebase - CFBundleTypeRole - Editor - CFBundleURLSchemes - - firebase-game-loop - - - + CameraPosition + back + GraphName + mobile_gpu + GraphOutputStream + output_video + GraphInputStream + input_video + VideoName + object_detection diff --git a/mediapipe/examples/ios/objectdetectiongpu/ViewController.h b/mediapipe/examples/ios/objectdetectiongpu/ViewController.h deleted file mode 100644 index c768fa0d9..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/ViewController.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import - -typedef NS_ENUM(NSInteger, MediaPipeDemoSourceMode) { - MediaPipeDemoSourceBackCamera, - MediaPipeDemoSourceVideo -}; - -@interface ViewController : UIViewController -- (void)setSourceMode:(MediaPipeDemoSourceMode)mode; -@end diff --git a/mediapipe/examples/ios/objectdetectiongpu/main.m b/mediapipe/examples/ios/objectdetectiongpu/main.m deleted file mode 100644 index 7ffe5ea5d..000000000 --- a/mediapipe/examples/ios/objectdetectiongpu/main.m +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import -#import "AppDelegate.h" - -int main(int argc, char * argv[]) { - @autoreleasepool { - return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); - } -} diff --git a/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD b/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD new file mode 100644 index 000000000..5b9a08be2 --- /dev/null +++ b/mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD @@ -0,0 +1,77 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "@build_bazel_rules_apple//apple:ios.bzl", + "ios_application", +) +load( + "//mediapipe/examples/ios:bundle_id.bzl", + "BUNDLE_ID_PREFIX", + "example_provisioning", +) + +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" + +alias( + name = "upperbodyposetrackinggpu", + actual = "UpperBodyPoseTrackingGpuApp", +) + +ios_application( + name = "UpperBodyPoseTrackingGpuApp", + app_icons = ["//mediapipe/examples/ios/common:AppIcon"], + bundle_id = BUNDLE_ID_PREFIX + ".UpperBodyPoseTrackingGpu", + families = [ + "iphone", + "ipad", + ], + infoplists = [ + "//mediapipe/examples/ios/common:Info.plist", + "Info.plist", + ], + minimum_os_version = MIN_IOS_VERSION, + provisioning_profile = example_provisioning(), + deps = [ + ":UpperBodyPoseTrackingGpuAppLibrary", + "@ios_opencv//:OpencvFramework", + ], +) + +objc_library( + name = "UpperBodyPoseTrackingGpuAppLibrary", + srcs = [ + "UpperBodyPoseTrackingViewController.mm", + ], + hdrs = [ + "UpperBodyPoseTrackingViewController.h", + ], + data = [ + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_gpu.binarypb", + "//mediapipe/modules/pose_detection:pose_detection.tflite", + "//mediapipe/modules/pose_landmark:pose_landmark_upper_body.tflite", + ], + deps = [ + "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", + ] + select({ + "//mediapipe:ios_i386": [], + "//mediapipe:ios_x86_64": [], + "//conditions:default": [ + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_gpu_deps", + "//mediapipe/framework/formats:landmark_cc_proto", + ], + }), +) diff --git a/mediapipe/examples/ios/upperbodyposetrackinggpu/Info.plist b/mediapipe/examples/ios/upperbodyposetrackinggpu/Info.plist new file mode 100644 index 000000000..ec4b768d8 --- /dev/null +++ b/mediapipe/examples/ios/upperbodyposetrackinggpu/Info.plist @@ -0,0 +1,16 @@ + + + + + CameraPosition + back + MainViewController + UpperBodyPoseTrackingViewController + GraphOutputStream + output_video + GraphInputStream + input_video + GraphName + upper_body_pose_tracking_gpu + + diff --git a/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.h b/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.h new file mode 100644 index 000000000..e9dea0d1d --- /dev/null +++ b/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.h @@ -0,0 +1,21 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/examples/ios/common/CommonViewController.h" + +@interface UpperBodyPoseTrackingViewController : CommonViewController + +@end diff --git a/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.mm b/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.mm new file mode 100644 index 000000000..00a14bfb7 --- /dev/null +++ b/mediapipe/examples/ios/upperbodyposetrackinggpu/UpperBodyPoseTrackingViewController.mm @@ -0,0 +1,53 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "UpperBodyPoseTrackingViewController.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static const char* kLandmarksOutputStream = "pose_landmarks"; + +@implementation UpperBodyPoseTrackingViewController + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self.mediapipeGraph addFrameOutputStream:kLandmarksOutputStream + outputPacketType:MPPPacketTypeRaw]; +} + +#pragma mark - MPPGraphDelegate methods + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No pose landmarks", packet.Timestamp().Value()); + return; + } + const auto& landmarks = packet.Get<::mediapipe::NormalizedLandmarkList>(); + NSLog(@"[TS:%lld] Number of pose landmarks: %d", packet.Timestamp().Value(), + landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } +} + +@end diff --git a/mediapipe/examples/python/__init__.py b/mediapipe/examples/python/__init__.py new file mode 100644 index 000000000..5d9133833 --- /dev/null +++ b/mediapipe/examples/python/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MediaPipe Python Examples.""" + +from mediapipe.examples.python.upper_body_pose_tracker import UpperBodyPoseTracker diff --git a/mediapipe/examples/python/upper_body_pose_tracker.py b/mediapipe/examples/python/upper_body_pose_tracker.py new file mode 100644 index 000000000..b7a195994 --- /dev/null +++ b/mediapipe/examples/python/upper_body_pose_tracker.py @@ -0,0 +1,205 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""MediaPipe upper body pose tracker. + +MediaPipe upper body pose tracker takes an RGB image as the input and returns +a pose landmark list and an annotated RGB image represented as a numpy ndarray. + +Usage examples: + pose_tracker = UpperBodyPoseTracker() + + pose_landmarks, _ = pose_tracker.run( + input_file='/tmp/input.png', + output_file='/tmp/output.png') + + input_image = cv2.imread('/tmp/input.png')[:, :, ::-1] + pose_landmarks, annotated_image = pose_tracker.run(input_image) + + pose_tracker.run_live() + + pose_tracker.close() +""" + +import os +import time +from typing import Tuple, Union + +import cv2 +import numpy as np +import mediapipe.python as mp +# resources dependency +from mediapipe.framework.formats import landmark_pb2 + +# Input and output stream names. +INPUT_VIDEO = 'input_video' +OUTPUT_VIDEO = 'output_video' +POSE_LANDMARKS = 'pose_landmarks' + + +class UpperBodyPoseTracker: + """MediaPipe upper body pose tracker.""" + + def __init__(self): + """The init method of MediaPipe upper body pose tracker. + + The method reads the upper body pose tracking cpu binary graph and + initializes a CalculatorGraph from it. The output packets of pose_landmarks + and output_video output streams will be observed by callbacks. The graph + will be started at the end of this method, waiting for input packets. + """ + # MediaPipe package root path + root_path = os.sep.join( os.path.abspath(__file__).split(os.sep)[:-4]) + mp.resource_util.set_resource_dir(root_path) + + self._graph = mp.CalculatorGraph( + binary_graph_path=os.path.join( + root_path, + 'mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.binarypb' + )) + self._outputs = {} + for stream_name in [POSE_LANDMARKS, OUTPUT_VIDEO]: + self._graph.observe_output_stream(stream_name, self._assign_packet) + self._graph.start_run() + + def run( + self, + input_frame: np.ndarray = None, + *, + input_file: str = None, + output_file: str = None + ) -> Tuple[Union[None, landmark_pb2.NormalizedLandmarkList], np.ndarray]: + """The run method of MediaPipe upper body pose tracker. + + MediaPipe upper body pose tracker takes either the path to an image file or + an RGB image represented as a numpy ndarray and it returns the pose + landmarks list and the annotated RGB image represented as a numpy ndarray. + + Args: + input_frame: An RGB image represented as a numpy ndarray. + input_file: The path to an image file. + output_file: The file path that the annotated image will be saved into. + + Returns: + pose_landmarks: The pose landmarks list. + annotated_image: The image with pose landmarks annotations. + + Raises: + RuntimeError: If the input frame doesn't contain 3 channels (RGB format) + or the input arg is not correctly provided. + + Examples + pose_tracker = UpperBodyPoseTracker() + pose_landmarks, _ = pose_tracker.run( + input_file='/tmp/input.png', + output_file='/tmp/output.png') + + # Read an image and convert the BGR image to RGB. + input_image = cv2.imread('/tmp/input.png')[:, :, ::-1] + pose_landmarks, annotated_image = pose_tracker.run(input_image) + pose_tracker.close() + """ + if input_file is None and input_frame is None: + raise RuntimeError( + 'Must provide either a path to an image file or an RGB image represented as a numpy.ndarray.' + ) + + if input_file: + if input_frame is not None: + raise RuntimeError( + 'Must only provide either \'input_file\' or \'input_frame\'.') + else: + input_frame = cv2.imread(input_file)[:, :, ::-1] + + pose_landmarks, annotated_image = self._run_graph(input_frame) + if output_file: + cv2.imwrite(output_file, annotated_image[:, :, ::-1]) + return pose_landmarks, annotated_image + + def run_live(self) -> None: + """Run MediaPipe upper body pose tracker with live camera input. + + The method will be self-terminated after 30 seconds. If you need to + terminate it earlier, press the Esc key to stop the run manually. Note that + you need to select the output image window rather than the terminal window + first and then press the key. + + Examples: + pose_tracker = UpperBodyPoseTracker() + pose_tracker.run_live() + pose_tracker.close() + """ + cap = cv2.VideoCapture(0) + start_time = time.time() + print( + 'Press Esc within the output image window to stop the run, or let it ' + 'self terminate after 30 seconds.') + while cap.isOpened() and time.time() - start_time < 30: + success, input_frame = cap.read() + if not success: + break + _, output_frame = self._run_graph(input_frame[:, :, ::-1]) + cv2.imshow('MediaPipe upper body pose tracker', output_frame[:, :, ::-1]) + if cv2.waitKey(5) & 0xFF == 27: + break + cap.release() + cv2.destroyAllWindows() + + def close(self) -> None: + self._graph.close() + self._graph = None + self._outputs = None + + def _run_graph( + self, + input_frame: np.ndarray = None, + ) -> Tuple[Union[None, landmark_pb2.NormalizedLandmarkList], np.ndarray]: + """The internal run graph method. + + Args: + input_frame: An RGB image represented as a numpy ndarray. + + Returns: + pose_landmarks: The pose landmarks list. + annotated_image: The image with pose landmarks annotations. + + Raises: + RuntimeError: If the input frame doesn't contain 3 channels representing + RGB. + """ + + if input_frame.shape[2] != 3: + raise RuntimeError('input frame must have 3 channels.') + + self._outputs.clear() + start_time = time.time() + self._graph.add_packet_to_input_stream( + stream=INPUT_VIDEO, + packet=mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=input_frame), + timestamp=mp.Timestamp.from_seconds(start_time)) + self._graph.wait_until_idle() + + pose_landmarks = None + if POSE_LANDMARKS in self._outputs: + pose_landmarks = mp.packet_getter.get_proto(self._outputs[POSE_LANDMARKS]) + annotated_image = mp.packet_getter.get_image_frame( + self._outputs[OUTPUT_VIDEO]).numpy_view() + print('UpperBodyPoseTracker.Run() took', + time.time() - start_time, 'seconds') + return pose_landmarks, annotated_image + + def _assign_packet(self, stream_name: str, packet: mp.Packet) -> None: + self._outputs[stream_name] = packet diff --git a/mediapipe/framework/calculator.proto b/mediapipe/framework/calculator.proto index 5afa7433c..503c7d559 100644 --- a/mediapipe/framework/calculator.proto +++ b/mediapipe/framework/calculator.proto @@ -194,9 +194,8 @@ message ProfilerConfig { // to be appended to the TraceBuffer. int64 trace_log_margin_usec = 12; - // False specifies an event for each calculator invocation. - // True specifies a separate event for each start and finish time. - bool trace_log_duration_events = 13; + // Deprecated, replaced by trace_log_instant_events. + bool trace_log_duration_events = 13 [deprecated = true]; // The number of trace log intervals per file. The total log duration is: // trace_log_interval_usec * trace_log_file_count * trace_log_interval_count. @@ -209,6 +208,10 @@ message ProfilerConfig { // If true, tracer timing events are recorded and reported. bool trace_enabled = 16; + + // False specifies an event for each calculator invocation. + // True specifies a separate event for each start and finish time. + bool trace_log_instant_events = 17; } // Describes the topology and function of a MediaPipe Graph. The graph of diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index 8f1d2faa0..f44931b32 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -1375,5 +1375,132 @@ TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) { MP_ASSERT_OK(graph.WaitUntilDone()); } +// A Calculator that sends a timestamp bound for every other input. +class OccasionalBoundCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + absl::SleepFor(absl::Milliseconds(1)); + if (cc->InputTimestamp().Value() % 20 == 0) { + Timestamp bound = cc->InputTimestamp().NextAllowedInStream(); + cc->Outputs().Index(0).SetNextTimestampBound( + std::max(bound, cc->Outputs().Index(0).NextTimestampBound())); + } + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(OccasionalBoundCalculator); + +// This test fails without the fix in CL/324708313, because +// PropagateUpdatesToMirrors is called with decreasing next_timestamp_bound, +// because each parallel thread in-flight computes next_timestamp_bound using +// a separate OutputStreamShard::NextTimestampBound. +TEST(CalculatorGraphBoundsTest, MaxInFlightWithOccasionalBound) { + // OccasionalCalculator runs on parallel threads and sends ts occasionally. + std::string config_str = R"( + input_stream: "input_0" + node { + calculator: "OccasionalBoundCalculator" + input_stream: "input_0" + output_stream: "output_0" + max_in_flight: 5 + } + num_threads: 4 + )"; + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(config_str); + CalculatorGraph graph; + std::vector output_0_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("output_0", [&](const Packet& p) { + output_0_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Send in packets. + for (int i = 0; i < 9; ++i) { + const int ts = 10 + i * 10; + Packet p = MakePacket(i).At(Timestamp(ts)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p)); + } + + // Only bounds arrive. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(output_0_packets.size(), 0); + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +// A Calculator that uses both SetTimestampOffset and SetNextTimestampBound. +class OffsetAndBoundCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Open(CalculatorContext* cc) final { + cc->SetOffset(0); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Process(CalculatorContext* cc) final { + if (cc->InputTimestamp().Value() % 20 == 0) { + cc->Outputs().Index(0).SetNextTimestampBound(Timestamp(10000)); + } + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(OffsetAndBoundCalculator); + +// This test shows that the bound defined by SetOffset is ignored +// if it is superseded by SetNextTimestampBound. +TEST(CalculatorGraphBoundsTest, OffsetAndBound) { + // OffsetAndBoundCalculator runs on parallel threads and sends ts + // occasionally. + std::string config_str = R"( + input_stream: "input_0" + node { + calculator: "OffsetAndBoundCalculator" + input_stream: "input_0" + output_stream: "output_0" + } + )"; + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(config_str); + CalculatorGraph graph; + std::vector output_0_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("output_0", [&](const Packet& p) { + output_0_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Send in packets. + for (int i = 0; i < 9; ++i) { + const int ts = 10 + i * 10; + Packet p = MakePacket(i).At(Timestamp(ts)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p)); + } + + // Only bounds arrive. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(output_0_packets.size(), 0); + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/output_stream_manager.cc b/mediapipe/framework/output_stream_manager.cc index deacf4fbb..d3d9c7a44 100644 --- a/mediapipe/framework/output_stream_manager.cc +++ b/mediapipe/framework/output_stream_manager.cc @@ -118,10 +118,9 @@ Timestamp OutputStreamManager::ComputeOutputTimestampBound( // Note that "MaxOutputTimestamp()" must consider both output packet // timetstamp and SetNextTimestampBound values. // See the timestamp mapping section in go/mediapipe-bounds for details. - Timestamp new_bound = output_stream_shard.NextTimestampBound(); + Timestamp input_bound; if (output_stream_spec_.offset_enabled && input_timestamp != Timestamp::Unstarted()) { - Timestamp input_bound; if (input_timestamp == Timestamp::PreStream()) { // Timestamp::PreStream() is a special value that we shouldn't apply any // offset. @@ -144,9 +143,16 @@ Timestamp OutputStreamManager::ComputeOutputTimestampBound( input_bound = input_timestamp.NextAllowedInStream() + output_stream_spec_.offset; } + } + Timestamp new_bound; + // The input_bound if it is greater than the shard bound. + if (input_bound > output_stream_shard.NextTimestampBound()) { new_bound = std::max(new_bound, input_bound); } - + // The bound defined by SetNextTimestampBound. + new_bound = + std::max(new_bound, output_stream_shard.updated_next_timestamp_bound_); + // The bound defined by added packets. if (!output_stream_shard.IsEmpty()) { new_bound = std::max( new_bound, @@ -160,8 +166,10 @@ void OutputStreamManager::PropagateUpdatesToMirrors( Timestamp next_timestamp_bound, OutputStreamShard* output_stream_shard) { CHECK(output_stream_shard); { - absl::MutexLock lock(&stream_mutex_); - next_timestamp_bound_ = next_timestamp_bound; + if (next_timestamp_bound != Timestamp::Unset()) { + absl::MutexLock lock(&stream_mutex_); + next_timestamp_bound_ = next_timestamp_bound; + } } std::list* packets_to_propagate = output_stream_shard->OutputQueue(); VLOG(3) << "Output stream: " << Name() @@ -170,9 +178,10 @@ void OutputStreamManager::PropagateUpdatesToMirrors( << " next timestamp: " << next_timestamp_bound; bool add_packets = !packets_to_propagate->empty(); bool set_bound = - !add_packets || - packets_to_propagate->back().Timestamp().NextAllowedInStream() != - next_timestamp_bound; + (next_timestamp_bound != Timestamp::Unset()) && + (!add_packets || + packets_to_propagate->back().Timestamp().NextAllowedInStream() != + next_timestamp_bound); int mirror_count = mirrors_.size(); for (int idx = 0; idx < mirror_count; ++idx) { const Mirror& mirror = mirrors_[idx]; diff --git a/mediapipe/framework/output_stream_manager_test.cc b/mediapipe/framework/output_stream_manager_test.cc index e0f47f078..4428790c6 100644 --- a/mediapipe/framework/output_stream_manager_test.cc +++ b/mediapipe/framework/output_stream_manager_test.cc @@ -123,14 +123,12 @@ TEST_F(OutputStreamManagerTest, Init) {} TEST_F(OutputStreamManagerTest, ComputeOutputTimestampBoundWithoutOffset) { Timestamp input_timestamp = Timestamp(0); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); // If the offset isn't enabled, the input timestamp and the output bound are // not related. Since the output stream shard is empty, the output bound is - // still equal to the previous timestamp bound of the stream. - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + // still Timestamp::Unset(). + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_shard_.AddPacket( MakePacket("Packet 1").At(Timestamp(10))); @@ -253,11 +251,10 @@ TEST_F(OutputStreamManagerTest, ComputeOutputTimestampBoundWithNegativeOffset) { TEST_F(OutputStreamManagerTest, ComputeOutputTimestampBoundWithoutOffsetAfterOpenNode) { Timestamp input_timestamp = Timestamp::Unstarted(); - // If the OutputStreamShard is empty, the output_bound is - // Timestamp::PreStream(). + // If the OutputStreamShard is empty, the output_bound is Timestamp::Unset(). Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - EXPECT_EQ(Timestamp::PreStream(), output_bound); + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_shard_.AddPacket( MakePacket("Packet 1").At(Timestamp(20))); @@ -277,10 +274,10 @@ TEST_F(OutputStreamManagerTest, output_stream_shard_.SetOffset(0); Timestamp input_timestamp = Timestamp::Unstarted(); // If the OutputStreamShard is empty, the output_bound is always - // Timestamp::PreStream() regardless of the offset. + // Timestamp::Unset() regardless of the offset. Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - EXPECT_EQ(Timestamp::PreStream(), output_bound); + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_shard_.AddPacket( MakePacket("Packet 1").At(Timestamp(20))); @@ -298,13 +295,11 @@ TEST_F(OutputStreamManagerTest, TEST_F(OutputStreamManagerTest, ComputeOutputTimestampBoundWithoutOffsetForPreStream) { Timestamp input_timestamp = Timestamp::PreStream(); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); // If the OutputStreamShard is empty, the output bound is equal to the - // previous timestamp bound of the stream. + // Timestamp::Unset(). Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_shard_.AddPacket( MakePacket("Packet 1").At(Timestamp(20))); @@ -370,13 +365,10 @@ TEST_F(OutputStreamManagerTest, AddPacketUnset) { ASSERT_EQ(1, errors_.size()); EXPECT_TRUE(output_stream_shard_.IsEmpty()); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - // The output bound is still equal to the previous timestamp bound of the - // stream. - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + // The output bound is still equal to Timestamp::Unset(). + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_manager_->PropagateUpdatesToMirrors(output_bound, &output_stream_shard_); @@ -394,13 +386,10 @@ TEST_F(OutputStreamManagerTest, AddPacketUnstarted) { ASSERT_EQ(1, errors_.size()); EXPECT_TRUE(output_stream_shard_.IsEmpty()); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - // The output bound is still equal to the previous timestamp bound of the - // stream. - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + // The output bound is still equal to Timestamp::Unset(). + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_manager_->PropagateUpdatesToMirrors(output_bound, &output_stream_shard_); @@ -419,13 +408,10 @@ TEST_F(OutputStreamManagerTest, AddPacketOneOverPostStream) { ASSERT_EQ(1, errors_.size()); EXPECT_TRUE(output_stream_shard_.IsEmpty()); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - // The output bound is still equal to the previous timestamp bound of the - // stream. - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + // The output bound is still equal to Timestamp::Unset(). + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_manager_->PropagateUpdatesToMirrors(output_bound, &output_stream_shard_); @@ -444,13 +430,10 @@ TEST_F(OutputStreamManagerTest, AddPacketdDone) { ASSERT_EQ(1, errors_.size()); EXPECT_TRUE(output_stream_shard_.IsEmpty()); - Timestamp stream_previous_timestamp_bound = - output_stream_manager_->NextTimestampBound(); Timestamp output_bound = output_stream_manager_->ComputeOutputTimestampBound( output_stream_shard_, input_timestamp); - // The output bound is still equal to the previous timestamp bound of the - // stream. - EXPECT_EQ(stream_previous_timestamp_bound, output_bound); + // The output bound is still equal to Timestamp::Unset(). + EXPECT_EQ(Timestamp::Unset(), output_bound); output_stream_manager_->PropagateUpdatesToMirrors(output_bound, &output_stream_shard_); diff --git a/mediapipe/framework/output_stream_shard.cc b/mediapipe/framework/output_stream_shard.cc index 4105c1b89..825dc67fa 100644 --- a/mediapipe/framework/output_stream_shard.cc +++ b/mediapipe/framework/output_stream_shard.cc @@ -40,11 +40,13 @@ void OutputStreamShard::SetNextTimestampBound(Timestamp bound) { return; } next_timestamp_bound_ = bound; + updated_next_timestamp_bound_ = next_timestamp_bound_; } void OutputStreamShard::Close() { closed_ = true; next_timestamp_bound_ = Timestamp::Done(); + updated_next_timestamp_bound_ = next_timestamp_bound_; } bool OutputStreamShard::IsClosed() const { return closed_; } @@ -122,6 +124,7 @@ Status OutputStreamShard::AddPacketInternal(T&& packet) { // Otherwise, moves the packet into output_queue_. output_queue_.push_back(std::forward(packet)); next_timestamp_bound_ = timestamp.NextAllowedInStream(); + updated_next_timestamp_bound_ = next_timestamp_bound_; // TODO debug log? @@ -152,6 +155,7 @@ Timestamp OutputStreamShard::LastAddedPacketTimestamp() const { void OutputStreamShard::Reset(Timestamp next_timestamp_bound, bool close) { output_queue_.clear(); next_timestamp_bound_ = next_timestamp_bound; + updated_next_timestamp_bound_ = Timestamp::Unset(); closed_ = close; } diff --git a/mediapipe/framework/output_stream_shard.h b/mediapipe/framework/output_stream_shard.h index afa6055f5..009fc2f3c 100644 --- a/mediapipe/framework/output_stream_shard.h +++ b/mediapipe/framework/output_stream_shard.h @@ -117,6 +117,11 @@ class OutputStreamShard : public OutputStream { std::list output_queue_; bool closed_; Timestamp next_timestamp_bound_; + // Equal to next_timestamp_bound_ only if the bound has been explicitly set + // by the calculator. This is needed for parallel Process() calls, + // in order to avoid propagating the initial next_timestamp_bound_, which + // does not reflect the output of Process() for preceding timestamps. + Timestamp updated_next_timestamp_bound_; // Accesses OutputStreamShard for profiling. friend class GraphProfiler; diff --git a/mediapipe/framework/profiler/graph_profiler.cc b/mediapipe/framework/profiler/graph_profiler.cc index 3a5b1ce7a..45367efed 100644 --- a/mediapipe/framework/profiler/graph_profiler.cc +++ b/mediapipe/framework/profiler/graph_profiler.cc @@ -596,7 +596,7 @@ void AssignNodeNames(GraphProfile* profile) { absl::Microseconds(profiler_config_.trace_log_margin_usec()); GraphProfile profile; GraphTrace* trace = profile.add_graph_trace(); - if (!profiler_config_.trace_log_duration_events()) { + if (!profiler_config_.trace_log_instant_events()) { tracer()->GetTrace(previous_log_end_time_, end_time, trace); } else { tracer()->GetLog(previous_log_end_time_, end_time, trace); diff --git a/mediapipe/gpu/BUILD b/mediapipe/gpu/BUILD index b95c9e0c4..b92a75e63 100644 --- a/mediapipe/gpu/BUILD +++ b/mediapipe/gpu/BUILD @@ -432,6 +432,13 @@ cc_library( "gl_texture_buffer_pool.h", ], }), + copts = select({ + "//conditions:default": [], + "//mediapipe:apple": [ + "-x objective-c++", + "-fobjc-arc", + ], + }), visibility = ["//visibility:public"], deps = [ ":gl_base", diff --git a/mediapipe/gpu/gl_calculator_helper_impl_ios.mm b/mediapipe/gpu/gl_calculator_helper_impl_ios.mm index 00b2e643c..81cde6a9f 100644 --- a/mediapipe/gpu/gl_calculator_helper_impl_ios.mm +++ b/mediapipe/gpu/gl_calculator_helper_impl_ios.mm @@ -153,14 +153,28 @@ std::unique_ptr GlTexture::GetFrame() const { CVReturn err = CVPixelBufferLockBaseAddress(pixel_buffer, 0); NSCAssert(err == kCVReturnSuccess, @"CVPixelBufferLockBaseAddress failed: %d", err); OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer); + size_t bytes_per_row = CVPixelBufferGetBytesPerRow(pixel_buffer); + uint8_t* pixel_ptr = static_cast(CVPixelBufferGetBaseAddress(pixel_buffer)); if (pixel_format == kCVPixelFormatType_32BGRA) { // TODO: restore previous framebuffer? Move this to helper so we can // use BindFramebuffer? glViewport(0, 0, width_, height_); glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target_, name_, 0); - glReadPixels(0, 0, width_, height_, GL_BGRA, GL_UNSIGNED_BYTE, - CVPixelBufferGetBaseAddress(pixel_buffer)); + + size_t contiguous_bytes_per_row = width_ * 4; + if (bytes_per_row == contiguous_bytes_per_row) { + glReadPixels(0, 0, width_, height_, GL_BGRA, GL_UNSIGNED_BYTE, pixel_ptr); + } else { + std::vector contiguous_buffer(contiguous_bytes_per_row * height_); + uint8_t* temp_ptr = contiguous_buffer.data(); + glReadPixels(0, 0, width_, height_, GL_BGRA, GL_UNSIGNED_BYTE, temp_ptr); + for (int i = 0; i < height_; ++i) { + memcpy(pixel_ptr, temp_ptr, contiguous_bytes_per_row); + temp_ptr += contiguous_bytes_per_row; + pixel_ptr += bytes_per_row; + } + } } else { uint32_t format_big = CFSwapInt32HostToBig(pixel_format); NSLog(@"unsupported pixel format: %.4s", (char*)&format_big); diff --git a/mediapipe/gpu/gpu_buffer_multi_pool.cc b/mediapipe/gpu/gpu_buffer_multi_pool.cc index e20b23ad8..c98a3de75 100644 --- a/mediapipe/gpu/gpu_buffer_multi_pool.cc +++ b/mediapipe/gpu/gpu_buffer_multi_pool.cc @@ -22,6 +22,7 @@ #include "mediapipe/gpu/gpu_shared_data_internal.h" #ifdef __APPLE__ +#include "CoreFoundation/CFBase.h" #include "mediapipe/objc/CFHolder.h" #endif // __APPLE__ @@ -31,29 +32,70 @@ namespace mediapipe { static constexpr int kKeepCount = 2; // The maximum size of the GpuBufferMultiPool. When the limit is reached, the // oldest BufferSpec will be dropped. -static constexpr int kMaxPoolCount = 20; +static constexpr int kMaxPoolCount = 10; +// Time in seconds after which an inactive buffer can be dropped from the pool. +// Currently only used with CVPixelBufferPool. +static constexpr float kMaxInactiveBufferAge = 0.25; +// Skip allocating a buffer pool until at least this many requests have been +// made for a given BufferSpec. +static constexpr int kMinRequestsBeforePool = 2; +// Do a deeper flush every this many requests. +static constexpr int kRequestCountScrubInterval = 50; #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER -GpuBufferMultiPool::SimplePool GpuBufferMultiPool::MakeSimplePool( - const BufferSpec& spec) { +CvPixelBufferPoolWrapper::CvPixelBufferPoolWrapper(const BufferSpec& spec, + CFTimeInterval maxAge) { OSType cv_format = CVPixelFormatForGpuBufferFormat(spec.format); CHECK_NE(cv_format, -1) << "unsupported pixel format"; - return MakeCFHolderAdopting( - CreateCVPixelBufferPool(spec.width, spec.height, cv_format, kKeepCount, - 0.1 /* max age in seconds */)); + pool_ = MakeCFHolderAdopting( + /* keep count is 0 because the age param keeps buffers around anyway */ + CreateCVPixelBufferPool(spec.width, spec.height, cv_format, 0, maxAge)); } -GpuBuffer GpuBufferMultiPool::GetBufferFromSimplePool( - BufferSpec spec, const GpuBufferMultiPool::SimplePool& pool) { -#if TARGET_IPHONE_SIMULATOR - // On the simulator, syncing the texture with the pixelbuffer does not work, - // and we have to use glReadPixels. Since GL_UNPACK_ROW_LENGTH is not - // available in OpenGL ES 2, we should create the buffer so the pixels are - // contiguous. - // - // TODO: verify if we can use kIOSurfaceBytesPerRow to force the - // pool to give us contiguous data. +GpuBuffer CvPixelBufferPoolWrapper::GetBuffer(std::function flush) { + CVPixelBufferRef buffer; + int threshold = 1; + NSMutableDictionary* auxAttributes = + [NSMutableDictionary dictionaryWithCapacity:1]; + CVReturn err; + bool tried_flushing = false; + while (1) { + auxAttributes[(id)kCVPixelBufferPoolAllocationThresholdKey] = @(threshold); + err = CVPixelBufferPoolCreatePixelBufferWithAuxAttributes( + kCFAllocatorDefault, *pool_, (__bridge CFDictionaryRef)auxAttributes, + &buffer); + if (err != kCVReturnWouldExceedAllocationThreshold) break; + if (flush && !tried_flushing) { + // Call the flush function to potentially release old holds on buffers + // and try again to create a pixel buffer. + // This is used to flush CV texture caches, which may retain buffers until + // flushed. + flush(); + tried_flushing = true; + } else { + ++threshold; + } + } + CHECK(!err) << "Error creating pixel buffer: " << err; + count_ = threshold; + return GpuBuffer(MakeCFHolderAdopting(buffer)); +} + +std::string CvPixelBufferPoolWrapper::GetDebugString() const { + auto description = MakeCFHolderAdopting(CFCopyDescription(*pool_)); + return [(__bridge NSString*)*description UTF8String]; +} + +void CvPixelBufferPoolWrapper::Flush() { CVPixelBufferPoolFlush(*pool_, 0); } + +GpuBufferMultiPool::SimplePool GpuBufferMultiPool::MakeSimplePool( + const BufferSpec& spec) { + return std::make_shared(spec, + kMaxInactiveBufferAge); +} + +GpuBuffer GpuBufferMultiPool::GetBufferWithoutPool(const BufferSpec& spec) { OSType cv_format = CVPixelFormatForGpuBufferFormat(spec.format); CHECK_NE(cv_format, -1) << "unsupported pixel format"; CVPixelBufferRef buffer; @@ -61,26 +103,37 @@ GpuBuffer GpuBufferMultiPool::GetBufferFromSimplePool( cv_format, &buffer); CHECK(!err) << "Error creating pixel buffer: " << err; return GpuBuffer(MakeCFHolderAdopting(buffer)); -#else - CVPixelBufferRef buffer; - // TODO: allow the keepCount and the allocation threshold to be set - // by the application, and to be set independently. - static CFDictionaryRef auxAttributes = - CreateCVPixelBufferPoolAuxiliaryAttributesForThreshold(kKeepCount); - CVReturn err = CreateCVPixelBufferWithPool( - *pool, auxAttributes, - [this]() { - for (const auto& cache : texture_caches_) { +} + +void GpuBufferMultiPool::FlushTextureCaches() { + absl::MutexLock lock(&mutex_); + for (const auto& cache : texture_caches_) { #if TARGET_OS_OSX - CVOpenGLTextureCacheFlush(*cache, 0); + CVOpenGLTextureCacheFlush(*cache, 0); #else - CVOpenGLESTextureCacheFlush(*cache, 0); + CVOpenGLESTextureCacheFlush(*cache, 0); #endif // TARGET_OS_OSX - } - }, - &buffer); - CHECK(!err) << "Error creating pixel buffer: " << err; - return GpuBuffer(MakeCFHolderAdopting(buffer)); + } +} + +// Turning this on disables the pixel buffer pools when using the simulator. +// It is no longer necessary, since the helper code now supports non-contiguous +// buffers. We leave the code in for now for the sake of documentation. +#define FORCE_CONTIGUOUS_PIXEL_BUFFER_ON_IPHONE_SIMULATOR 0 + +GpuBuffer GpuBufferMultiPool::GetBufferFromSimplePool( + BufferSpec spec, const GpuBufferMultiPool::SimplePool& pool) { +#if TARGET_IPHONE_SIMULATOR && FORCE_CONTIGUOUS_PIXEL_BUFFER_ON_IPHONE_SIMULATOR + // On the simulator, syncing the texture with the pixelbuffer does not work, + // and we have to use glReadPixels. Since GL_UNPACK_ROW_LENGTH is not + // available in OpenGL ES 2, we should create the buffer so the pixels are + // contiguous. + // + // TODO: verify if we can use kIOSurfaceBytesPerRow to force the + // pool to give us contiguous data. + return GetBufferWithoutPool(spec); +#else + return pool->GetBuffer([this]() { FlushTextureCaches(); }); #endif // TARGET_IPHONE_SIMULATOR } @@ -92,6 +145,11 @@ GpuBufferMultiPool::SimplePool GpuBufferMultiPool::MakeSimplePool( kKeepCount); } +GpuBuffer GpuBufferMultiPool::GetBufferWithoutPool(const BufferSpec& spec) { + return GpuBuffer( + GlTextureBuffer::Create(spec.width, spec.height, spec.format)); +} + GpuBuffer GpuBufferMultiPool::GetBufferFromSimplePool( BufferSpec spec, const GpuBufferMultiPool::SimplePool& pool) { return GpuBuffer(pool->GetBuffer()); @@ -99,42 +157,132 @@ GpuBuffer GpuBufferMultiPool::GetBufferFromSimplePool( #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER -GpuBufferMultiPool::SimplePool GpuBufferMultiPool::GetSimplePool( +void GpuBufferMultiPool::EntryList::Prepend(Entry* entry) { + if (head_ == nullptr) { + head_ = tail_ = entry; + } else { + entry->next = head_; + head_->prev = entry; + head_ = entry; + } + ++size_; +} + +void GpuBufferMultiPool::EntryList::Append(Entry* entry) { + if (tail_ == nullptr) { + head_ = tail_ = entry; + } else { + tail_->next = entry; + entry->prev = tail_; + tail_ = entry; + } + ++size_; +} + +void GpuBufferMultiPool::EntryList::Remove(Entry* entry) { + if (entry == head_) { + head_ = entry->next; + } else { + entry->prev->next = entry->next; + } + if (entry == tail_) { + tail_ = entry->prev; + } else { + entry->next->prev = entry->prev; + } + entry->prev = nullptr; + entry->next = nullptr; + --size_; +} + +void GpuBufferMultiPool::EntryList::InsertAfter(Entry* entry, Entry* after) { + if (after != nullptr) { + entry->next = after->next; + if (entry->next) entry->next->prev = entry; + entry->prev = after; + after->next = entry; + ++size_; + } else + Prepend(entry); +} + +void GpuBufferMultiPool::Evict() { + // Remove excess entries. + while (entry_list_.size() > kMaxPoolCount) { + Entry* victim = entry_list_.tail(); + entry_list_.Remove(victim); + pools_.erase(victim->spec); + } + // Every kRequestCountScrubInterval requests, halve the request counts, and + // remove entries which have fallen to 0. + // This keeps sporadic requests from accumulating and eventually exceeding + // the minimum request threshold for allocating a pool. Also, it means that + // if the request regimen changes (e.g. a graph was always requesting a large + // size, but then switches to a small size to save memory or CPU), the pool + // can quickly adapt to it. + if (total_request_count_ >= kRequestCountScrubInterval) { + total_request_count_ = 0; + VLOG(2) << "begin pool scrub"; + for (Entry* entry = entry_list_.head(); entry != nullptr;) { + VLOG(2) << "entry for: " << entry->spec.width << "x" << entry->spec.height + << " request_count: " << entry->request_count + << " has pool: " << (entry->pool != nullptr); + entry->request_count /= 2; + Entry* next = entry->next; + if (entry->request_count == 0) { + entry_list_.Remove(entry); + pools_.erase(entry->spec); + } + entry = next; + } + } +} + +GpuBufferMultiPool::SimplePool GpuBufferMultiPool::RequestPool( const BufferSpec& key) { absl::MutexLock lock(&mutex_); auto pool_it = pools_.find(key); + Entry* entry; if (pool_it == pools_.end()) { - // Discard the least recently used pool in LRU cache. - if (pools_.size() >= kMaxPoolCount) { - auto old_spec = buffer_specs_.front(); // Front has LRU. - buffer_specs_.pop_front(); - pools_.erase(old_spec); - } - buffer_specs_.push_back(key); // Push new spec to back. std::tie(pool_it, std::ignore) = pools_.emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(MakeSimplePool(key))); + std::forward_as_tuple(key)); + entry = &pool_it->second; + CHECK_EQ(entry->request_count, 0); + entry->request_count = 1; + entry_list_.Append(entry); + if (entry->prev != nullptr) CHECK_GE(entry->prev->request_count, 1); } else { - // Find and move current 'key' spec to back, keeping others in same order. - auto specs_it = buffer_specs_.begin(); - while (specs_it != buffer_specs_.end()) { - if (*specs_it == key) { - buffer_specs_.erase(specs_it); - break; - } - ++specs_it; + entry = &pool_it->second; + ++entry->request_count; + Entry* larger = entry->prev; + while (larger != nullptr && larger->request_count < entry->request_count) { + larger = larger->prev; + } + if (larger != entry->prev) { + entry_list_.Remove(entry); + entry_list_.InsertAfter(entry, larger); } - buffer_specs_.push_back(key); } - return pool_it->second; + if (!entry->pool && entry->request_count >= kMinRequestsBeforePool) { + entry->pool = MakeSimplePool(key); + } + SimplePool pool = entry->pool; + ++total_request_count_; + Evict(); + return pool; } GpuBuffer GpuBufferMultiPool::GetBuffer(int width, int height, GpuBufferFormat format) { BufferSpec key(width, height, format); - SimplePool pool = GetSimplePool(key); - // Note: we release our multipool lock before accessing the simple pool. - return GetBufferFromSimplePool(key, pool); + SimplePool pool = RequestPool(key); + if (pool) { + // Note: we release our multipool lock before accessing the simple pool. + return GetBufferFromSimplePool(key, pool); + } else { + return GetBufferWithoutPool(key); + } } GpuBufferMultiPool::~GpuBufferMultiPool() { diff --git a/mediapipe/gpu/gpu_buffer_multi_pool.h b/mediapipe/gpu/gpu_buffer_multi_pool.h index 79e5c234b..8039d18a2 100644 --- a/mediapipe/gpu/gpu_buffer_multi_pool.h +++ b/mediapipe/gpu/gpu_buffer_multi_pool.h @@ -74,6 +74,23 @@ struct BufferSpecHash { } }; +#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER +class CvPixelBufferPoolWrapper { + public: + CvPixelBufferPoolWrapper(const BufferSpec& spec, CFTimeInterval maxAge); + GpuBuffer GetBuffer(std::function flush); + + int GetBufferCount() const { return count_; } + std::string GetDebugString() const; + + void Flush(); + + private: + CFHolder pool_; + int count_ = 0; +}; +#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER + class GpuBufferMultiPool { public: GpuBufferMultiPool() {} @@ -93,25 +110,63 @@ class GpuBufferMultiPool { // Remove a texture cache from the list of caches to be flushed. void UnregisterTextureCache(CVTextureCacheType cache); + + void FlushTextureCaches(); #endif // defined(__APPLE__) private: #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER - typedef CFHolder SimplePool; + using SimplePool = std::shared_ptr; #else - typedef std::shared_ptr SimplePool; + using SimplePool = std::shared_ptr; #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER + struct Entry { + Entry(const BufferSpec& spec) : spec(spec) {} + Entry* prev = nullptr; + Entry* next = nullptr; + BufferSpec spec; + int request_count = 0; + SimplePool pool; + }; + + // Unlike std::list, this is an intrusive list, meaning that the prev and next + // pointers live inside the element. Apart from not requiring an extra + // allocation, this means that once we look up an entry by key in the pools_ + // map we do not need to look it up separately in the list. + // + class EntryList { + public: + void Prepend(Entry* entry); + void Append(Entry* entry); + void Remove(Entry* entry); + void InsertAfter(Entry* entry, Entry* after); + + Entry* head() { return head_; } + Entry* tail() { return tail_; } + size_t size() { return size_; } + + private: + Entry* head_ = nullptr; + Entry* tail_ = nullptr; + size_t size_ = 0; + }; + SimplePool MakeSimplePool(const BufferSpec& spec); - SimplePool GetSimplePool(const BufferSpec& key); + // Requests a simple buffer pool for the given spec. This may return nullptr + // if we have not yet reached a sufficient number of requests to allocate a + // pool, in which case the caller should invoke GetBufferWithoutPool instead + // of GetBufferFromSimplePool. + SimplePool RequestPool(const BufferSpec& key); GpuBuffer GetBufferFromSimplePool(BufferSpec spec, const SimplePool& pool); + GpuBuffer GetBufferWithoutPool(const BufferSpec& spec); + void Evict() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); absl::Mutex mutex_; - std::unordered_map pools_ + std::unordered_map pools_ ABSL_GUARDED_BY(mutex_); - // A queue of BufferSpecs to keep track of the age of each BufferSpec added to - // the pool. - std::deque buffer_specs_; + EntryList entry_list_ ABSL_GUARDED_BY(mutex_); + int total_request_count_ = 0; #ifdef __APPLE__ // Texture caches used with this pool. diff --git a/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt b/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt index 81ffd8e28..943bf1767 100644 --- a/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt +++ b/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs GPU Sobel edge detection on a live video stream. # Used in the examples in -# mediapipe/examples/android/src/java/com/mediapipe/apps/skeleton and -# mediapipe/examples/ios/edgedetectiongpu. +# mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:helloworld +# and mediapipe/examples/ios/helloworld. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/pose_tracking/BUILD b/mediapipe/graphs/pose_tracking/BUILD new file mode 100644 index 000000000..fcb1217f7 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/BUILD @@ -0,0 +1,58 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "upper_body_pose_tracking_gpu_deps", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/graphs/pose_tracking/calculators:landmarks_smoothing_calculator", + "//mediapipe/graphs/pose_tracking/subgraphs:upper_body_pose_renderer_gpu", + "//mediapipe/modules/pose_landmark:pose_landmark_upper_body_gpu", + ], +) + +mediapipe_binary_graph( + name = "upper_body_pose_tracking_gpu_binary_graph", + graph = "upper_body_pose_tracking_gpu.pbtxt", + output_name = "upper_body_pose_tracking_gpu.binarypb", + deps = [":upper_body_pose_tracking_gpu_deps"], +) + +cc_library( + name = "upper_body_pose_tracking_cpu_deps", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/graphs/pose_tracking/calculators:landmarks_smoothing_calculator", + "//mediapipe/graphs/pose_tracking/subgraphs:upper_body_pose_renderer_cpu", + "//mediapipe/modules/pose_landmark:pose_landmark_upper_body_cpu", + ], +) + +mediapipe_binary_graph( + name = "upper_body_pose_tracking_cpu_binary_graph", + graph = "upper_body_pose_tracking_cpu.pbtxt", + output_name = "upper_body_pose_tracking_cpu.binarypb", + deps = [":upper_body_pose_tracking_cpu_deps"], +) diff --git a/mediapipe/graphs/pose_tracking/calculators/BUILD b/mediapipe/graphs/pose_tracking/calculators/BUILD new file mode 100644 index 000000000..fa8d9f41b --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/BUILD @@ -0,0 +1,85 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "low_pass_filter", + srcs = ["low_pass_filter.cc"], + hdrs = ["low_pass_filter.h"], + deps = [ + "//mediapipe/framework/port:logging", + "@com_google_absl//absl/memory", + ], +) + +cc_test( + name = "low_pass_filter_test", + srcs = ["low_pass_filter_test.cc"], + deps = [ + ":low_pass_filter", + "//mediapipe/framework/port:gtest_main", + ], +) + +cc_library( + name = "relative_velocity_filter", + srcs = ["relative_velocity_filter.cc"], + hdrs = ["relative_velocity_filter.h"], + deps = [ + ":low_pass_filter", + "//mediapipe/framework/port:logging", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/time", + ], +) + +cc_test( + name = "relative_velocity_filter_test", + srcs = ["relative_velocity_filter_test.cc"], + deps = [ + ":relative_velocity_filter", + "//mediapipe/framework/port:gtest_main", + "@com_google_absl//absl/time", + ], +) + +mediapipe_proto_library( + name = "landmarks_smoothing_calculator_proto", + srcs = ["landmarks_smoothing_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + +cc_library( + name = "landmarks_smoothing_calculator", + srcs = ["landmarks_smoothing_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":landmarks_smoothing_calculator_cc_proto", + ":relative_velocity_filter", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/algorithm:container", + ], + alwayslink = 1, +) diff --git a/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.cc b/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.cc new file mode 100644 index 000000000..4b9b0f87b --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.cc @@ -0,0 +1,273 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/algorithm/container.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.pb.h" +#include "mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h" + +namespace mediapipe { + +namespace { + +constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +constexpr char kNormalizedFilteredLandmarksTag[] = "NORM_FILTERED_LANDMARKS"; + +using ::mediapipe::RelativeVelocityFilter; + +// Estimate object scale to use its inverse value as velocity scale for +// RelativeVelocityFilter. If value will be too small (less than +// `options_.min_allowed_object_scale`) smoothing will be disabled and +// landmarks will be returned as is. +// Object scale is calculated as average between bounding box width and height +// with sides parallel to axis. +float GetObjectScale(const NormalizedLandmarkList& landmarks, int image_width, + int image_height) { + const auto& [lm_min_x, lm_max_x] = absl::c_minmax_element( + landmarks.landmark(), + [](const auto& a, const auto& b) { return a.x() < b.x(); }); + const float x_min = lm_min_x->x(); + const float x_max = lm_max_x->x(); + + const auto& [lm_min_y, lm_max_y] = absl::c_minmax_element( + landmarks.landmark(), + [](const auto& a, const auto& b) { return a.y() < b.y(); }); + const float y_min = lm_min_y->y(); + const float y_max = lm_max_y->y(); + + const float object_width = (x_max - x_min) * image_width; + const float object_height = (y_max - y_min) * image_height; + + return (object_width + object_height) / 2.0f; +} + +// Abstract class for various landmarks filters. +class LandmarksFilter { + public: + virtual ~LandmarksFilter() = default; + + virtual ::mediapipe::Status Reset() { return ::mediapipe::OkStatus(); } + + virtual ::mediapipe::Status Apply(const NormalizedLandmarkList& in_landmarks, + const std::pair& image_size, + const absl::Duration& timestamp, + NormalizedLandmarkList* out_landmarks) = 0; +}; + +// Returns landmarks as is without smoothing. +class NoFilter : public LandmarksFilter { + public: + ::mediapipe::Status Apply(const NormalizedLandmarkList& in_landmarks, + const std::pair& image_size, + const absl::Duration& timestamp, + NormalizedLandmarkList* out_landmarks) override { + *out_landmarks = in_landmarks; + return ::mediapipe::OkStatus(); + } +}; + +// Please check RelativeVelocityFilter documentation for details. +class VelocityFilter : public LandmarksFilter { + public: + VelocityFilter(int window_size, float velocity_scale, + float min_allowed_object_scale) + : window_size_(window_size), + velocity_scale_(velocity_scale), + min_allowed_object_scale_(min_allowed_object_scale) {} + + ::mediapipe::Status Reset() override { + x_filters_.clear(); + y_filters_.clear(); + z_filters_.clear(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Apply(const NormalizedLandmarkList& in_landmarks, + const std::pair& image_size, + const absl::Duration& timestamp, + NormalizedLandmarkList* out_landmarks) override { + // Get image size. + int image_width; + int image_height; + std::tie(image_width, image_height) = image_size; + + // Get value scale as inverse value of the object scale. + // If value is too small smoothing will be disabled and landmarks will be + // returned as is. + const float object_scale = + GetObjectScale(in_landmarks, image_width, image_height); + if (object_scale < min_allowed_object_scale_) { + *out_landmarks = in_landmarks; + return ::mediapipe::OkStatus(); + } + const float value_scale = 1.0f / object_scale; + + // Initialize filters once. + MP_RETURN_IF_ERROR(InitializeFiltersIfEmpty(in_landmarks.landmark_size())); + + // Filter landmarks. Every axis of every landmark is filtered separately. + for (int i = 0; i < in_landmarks.landmark_size(); ++i) { + const NormalizedLandmark& in_landmark = in_landmarks.landmark(i); + + NormalizedLandmark* out_landmark = out_landmarks->add_landmark(); + out_landmark->set_x(x_filters_[i].Apply(timestamp, value_scale, + in_landmark.x() * image_width) / + image_width); + out_landmark->set_y(y_filters_[i].Apply(timestamp, value_scale, + in_landmark.y() * image_height) / + image_height); + // Scale Z the save was as X (using image width). + out_landmark->set_z(z_filters_[i].Apply(timestamp, value_scale, + in_landmark.z() * image_width) / + image_width); + // Keep visibility as is. + out_landmark->set_visibility(in_landmark.visibility()); + } + + return ::mediapipe::OkStatus(); + } + + private: + // Initializes filters for the first time or after Reset. If initialized then + // check the size. + ::mediapipe::Status InitializeFiltersIfEmpty(const int n_landmarks) { + if (!x_filters_.empty()) { + RET_CHECK_EQ(x_filters_.size(), n_landmarks); + RET_CHECK_EQ(y_filters_.size(), n_landmarks); + RET_CHECK_EQ(z_filters_.size(), n_landmarks); + return ::mediapipe::OkStatus(); + } + + x_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + y_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + z_filters_.resize(n_landmarks, + RelativeVelocityFilter(window_size_, velocity_scale_)); + + return ::mediapipe::OkStatus(); + } + + int window_size_; + float velocity_scale_; + float min_allowed_object_scale_; + + std::vector x_filters_; + std::vector y_filters_; + std::vector z_filters_; +}; + +} // namespace + +// A calculator to smooth landmarks over time. +// +// Inputs: +// NORM_LANDMARKS: A NormalizedLandmarkList of landmarks you want to smooth. +// IMAGE_SIZE: A std::pair represention of image width and height. +// Required to perform all computations in absolute coordinates to avoid any +// influence of normalized values. +// +// Outputs: +// NORM_FILTERED_LANDMARKS: A NormalizedLandmarkList of smoothed landmarks. +// +// Example config: +// node { +// calculator: "LandmarksSmoothingCalculator" +// input_stream: "NORM_LANDMARKS:pose_landmarks" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "NORM_FILTERED_LANDMARKS:pose_landmarks_filtered" +// node_options: { +// [type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] { +// velocity_filter: { +// window_size: 5 +// velocity_scale: 10.0 +// } +// } +// } +// } +// +class LandmarksSmoothingCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + LandmarksFilter* landmarks_filter_; +}; +REGISTER_CALCULATOR(LandmarksSmoothingCalculator); + +::mediapipe::Status LandmarksSmoothingCalculator::GetContract( + CalculatorContract* cc) { + cc->Inputs().Tag(kNormalizedLandmarksTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs() + .Tag(kNormalizedFilteredLandmarksTag) + .Set(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LandmarksSmoothingCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + + // Pick landmarks filter. + const auto& options = cc->Options(); + if (options.has_no_filter()) { + landmarks_filter_ = new NoFilter(); + } else if (options.has_velocity_filter()) { + landmarks_filter_ = new VelocityFilter( + options.velocity_filter().window_size(), + options.velocity_filter().velocity_scale(), + options.velocity_filter().min_allowed_object_scale()); + } else { + RET_CHECK_FAIL() + << "Landmarks filter is either not specified or not supported"; + } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LandmarksSmoothingCalculator::Process( + CalculatorContext* cc) { + // Check that landmarks are not empty and reset the filter if so. + // Don't emit an empty packet for this timestamp. + if (cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) { + MP_RETURN_IF_ERROR(landmarks_filter_->Reset()); + return ::mediapipe::OkStatus(); + } + + const auto& in_landmarks = + cc->Inputs().Tag(kNormalizedLandmarksTag).Get(); + const auto& image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + const auto& timestamp = + absl::Microseconds(cc->InputTimestamp().Microseconds()); + + auto out_landmarks = absl::make_unique(); + MP_RETURN_IF_ERROR(landmarks_filter_->Apply(in_landmarks, image_size, + timestamp, out_landmarks.get())); + + cc->Outputs() + .Tag(kNormalizedFilteredLandmarksTag) + .Add(out_landmarks.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.proto b/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.proto new file mode 100644 index 000000000..9c7dd502b --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/landmarks_smoothing_calculator.proto @@ -0,0 +1,48 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message LandmarksSmoothingCalculatorOptions { + extend CalculatorOptions { + optional LandmarksSmoothingCalculatorOptions ext = 325671429; + } + + // Default behaviour and fast way to disable smoothing. + message NoFilter {} + + message VelocityFilter { + // Number of value changes to keep over time. + // Higher value adds to lag and to stability. + optional int32 window_size = 1 [default = 5]; + + // Scale to apply to the velocity calculated over the given window. With + // higher velocity `low pass filter` weights new values higher. + // Lower value adds to lag and to stability. + optional float velocity_scale = 2 [default = 10.0]; + + // If calculated object scale is less than given value smoothing will be + // disabled and landmarks will be returned as is. + optional float min_allowed_object_scale = 3 [default = 1e-6]; + } + + oneof filter_options { + NoFilter no_filter = 1; + VelocityFilter velocity_filter = 2; + } +} diff --git a/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.cc b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.cc new file mode 100644 index 000000000..6b1bfb149 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.cc @@ -0,0 +1,58 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h" + +#include "absl/memory/memory.h" +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +LowPassFilter::LowPassFilter(float alpha) : initialized_{false} { + SetAlpha(alpha); +} + +float LowPassFilter::Apply(float value) { + float result; + if (initialized_) { + result = alpha_ * value + (1.0 - alpha_) * stored_value_; + } else { + result = value; + initialized_ = true; + } + raw_value_ = value; + stored_value_ = result; + return result; +} + +float LowPassFilter::ApplyWithAlpha(float value, float alpha) { + SetAlpha(alpha); + return Apply(value); +} + +bool LowPassFilter::HasLastRawValue() { return initialized_; } + +float LowPassFilter::LastRawValue() { return raw_value_; } + +float LowPassFilter::LastValue() { return stored_value_; } + +void LowPassFilter::SetAlpha(float alpha) { + if (alpha < 0.0f || alpha > 1.0f) { + LOG(ERROR) << "alpha: " << alpha << " should be in [0.0, 1.0] range"; + return; + } + alpha_ = alpha; +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h new file mode 100644 index 000000000..d7a63e3e2 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h @@ -0,0 +1,47 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_LOW_PASS_FILTER_H_ +#define MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_LOW_PASS_FILTER_H_ + +#include + +namespace mediapipe { + +class LowPassFilter { + public: + explicit LowPassFilter(float alpha); + + float Apply(float value); + + float ApplyWithAlpha(float value, float alpha); + + bool HasLastRawValue(); + + float LastRawValue(); + + float LastValue(); + + private: + void SetAlpha(float alpha); + + float raw_value_; + float alpha_; + float stored_value_; + bool initialized_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_LOW_PASS_FILTER_H_ diff --git a/mediapipe/graphs/pose_tracking/calculators/low_pass_filter_test.cc b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter_test.cc new file mode 100644 index 000000000..e782d9c48 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/low_pass_filter_test.cc @@ -0,0 +1,35 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h" + +#include "mediapipe/framework/port/gtest.h" + +namespace mediapipe { + +TEST(LowPassFilterTest, LowPassFilterBasicChecks) { + auto filter = absl::make_unique(1.0f); + EXPECT_EQ(2.0f, filter->Apply(2.0f)); + EXPECT_EQ(100.0f, filter->Apply(100.0f)); + + filter = absl::make_unique(0.0f); + EXPECT_EQ(2.0f, filter->Apply(2.0f)); + EXPECT_EQ(2.0f, filter->Apply(100.0f)); + + filter = absl::make_unique(0.5f); + EXPECT_EQ(2.0f, filter->Apply(2.0f)); + EXPECT_EQ(51.0f, filter->Apply(100.0f)); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.cc b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.cc new file mode 100644 index 000000000..2a120e3c8 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.cc @@ -0,0 +1,85 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h" + +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +float RelativeVelocityFilter::Apply(absl::Duration timestamp, float value_scale, + float value) { + const int64_t new_timestamp = absl::ToInt64Nanoseconds(timestamp); + if (last_timestamp_ >= new_timestamp) { + // Results are unpredictable in this case, so nothing to do but + // return same value + LOG(WARNING) << "New timestamp is equal or less than the last one."; + return value; + } + + float alpha; + if (last_timestamp_ == -1) { + alpha = 1.0; + } else { + DCHECK(distance_mode_ == DistanceEstimationMode::kLegacyTransition || + distance_mode_ == DistanceEstimationMode::kForceCurrentScale); + const float distance = + distance_mode_ == DistanceEstimationMode::kLegacyTransition + ? value * value_scale - + last_value_ * last_value_scale_ // Original. + : value_scale * (value - last_value_); // Translation invariant. + + const int64_t duration = new_timestamp - last_timestamp_; + + float cumulative_distance = distance; + int64_t cumulative_duration = duration; + + // Define max cumulative duration assuming + // 30 frames per second is a good frame rate, so assuming 30 values + // per second or 1 / 30 of a second is a good duration per window element + constexpr int64_t kAssumedMaxDuration = 1000000000 / 30; + const int64_t max_cumulative_duration = + (1 + window_.size()) * kAssumedMaxDuration; + for (const auto& el : window_) { + if (cumulative_duration + el.duration > max_cumulative_duration) { + // This helps in cases when durations are large and outdated + // window elements have bad impact on filtering results + break; + } + cumulative_distance += el.distance; + cumulative_duration += el.duration; + } + + constexpr double kNanoSecondsToSecond = 1e-9; + const float velocity = + cumulative_distance / (cumulative_duration * kNanoSecondsToSecond); + alpha = 1.0f - 1.0f / (1.0f + velocity_scale_ * std::abs(velocity)); + window_.push_front({distance, duration}); + if (window_.size() > max_window_size_) { + window_.pop_back(); + } + } + + last_value_ = value; + last_value_scale_ = value_scale; + last_timestamp_ = new_timestamp; + + return low_pass_filter_.ApplyWithAlpha(value, alpha); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h new file mode 100644 index 000000000..8f68ab062 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h @@ -0,0 +1,90 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_RELATIVE_VELOCITY_FILTER_H_ +#define MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_RELATIVE_VELOCITY_FILTER_H_ + +#include +#include + +#include "absl/time/time.h" +#include "mediapipe/graphs/pose_tracking/calculators/low_pass_filter.h" + +namespace mediapipe { + +// This filter keeps track (on a window of specified size) of +// value changes over time, which as result gives us velocity of how value +// changes over time. With higher velocity it weights new values higher. +// +// Use @window_size and @velocity_scale to tweak this filter for your use case. +// +// - higher @window_size adds to lag and to stability +// - lower @velocity_scale adds to lag and to stability +class RelativeVelocityFilter { + public: + enum class DistanceEstimationMode { + // When the value scale changes, uses a heuristic + // that is not translation invariant (see the implementation for details). + kLegacyTransition, + // The current (i.e. last) value scale is always used for scale estimation. + // When using this mode, the filter is translation invariant, i.e. + // Filter(Data + Offset) = Filter(Data) + Offset. + kForceCurrentScale, + + kDefault = kLegacyTransition + }; + + public: + RelativeVelocityFilter(size_t window_size, float velocity_scale, + DistanceEstimationMode distance_mode) + : max_window_size_{window_size}, + window_{window_size}, + velocity_scale_{velocity_scale}, + distance_mode_{distance_mode} {} + + RelativeVelocityFilter(size_t window_size, float velocity_scale) + : RelativeVelocityFilter{window_size, velocity_scale, + DistanceEstimationMode::kDefault} {} + + // Applies filter to the value. + // @timestamp - timestamp associated with the value (for instance, + // timestamp of the frame where you got value from) + // @value_scale - value scale (for instance, if your value is a distance + // detected on a frame, it can look same on different + // devices but have quite different absolute values due + // to different resolution, you should come up with an + // appropriate parameter for your particular use case) + // @value - value to filter + float Apply(absl::Duration timestamp, float value_scale, float value); + + private: + struct WindowElement { + float distance; + int64_t duration; + }; + + float last_value_{0.0}; + float last_value_scale_{1.0}; + int64_t last_timestamp_{-1}; + + size_t max_window_size_; + std::deque window_; + LowPassFilter low_pass_filter_{1.0f}; + float velocity_scale_; + DistanceEstimationMode distance_mode_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_GRAPHS_POSE_TRACKING_CALCULATORS_RELATIVE_VELOCITY_FILTER_H_ diff --git a/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter_test.cc b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter_test.cc new file mode 100644 index 000000000..ce94a356e --- /dev/null +++ b/mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter_test.cc @@ -0,0 +1,292 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/graphs/pose_tracking/calculators/relative_velocity_filter.h" + +#include +#include +#include + +#include "absl/time/time.h" +#include "mediapipe/framework/port/gtest.h" + +namespace mediapipe { + +using DistanceEstimationMode = + ::mediapipe::RelativeVelocityFilter::DistanceEstimationMode; + +absl::Duration DurationFromNanos(int64_t nanos) { + return absl::FromChrono(std::chrono::nanoseconds{nanos}); +} + +absl::Duration DurationFromMillis(int64_t millis) { + return absl::FromChrono(std::chrono::milliseconds{millis}); +} + +TEST(RelativeVelocityFilterTest, ApplyIncorrectTimestamp) { + auto filter = absl::make_unique(1, 1.0); + + absl::Duration timestamp1 = DurationFromNanos(1); + + EXPECT_FLOAT_EQ(95.5f, filter->Apply(timestamp1, 0.5f, 95.5f)); + EXPECT_FLOAT_EQ(200.5f, filter->Apply(timestamp1, 0.5f, 200.5f)); + EXPECT_FLOAT_EQ(1000.5f, filter->Apply(timestamp1, 0.5f, 1000.5f)); + + EXPECT_FLOAT_EQ(2000.0f, filter->Apply(DurationFromNanos(1), 0.5f, 2000.0f)); +} + +void TestSameValueScaleDifferentVelocityScales( + DistanceEstimationMode distance_mode) { + // Changing the distance estimation mode has no effect with constant scales. + + // More sensitive filter. + auto filter1 = absl::make_unique( + /*window_size=*/5, /*velocity_scale=*/45.0f, + /*distance_mode=*/distance_mode); + // Less sensitive filter. + auto filter2 = absl::make_unique( + /*window_size=*/5, /*velocity_scale=*/0.1f, + /*distance_mode=*/distance_mode); + + float result1; + float result2; + float value; + float value_scale = 1.0f; + + value = 1.0f; + result1 = filter1->Apply(DurationFromMillis(1), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(1), value_scale, value); + EXPECT_EQ(result1, result2); + + value = 10.0f; + result1 = filter1->Apply(DurationFromMillis(2), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(2), value_scale, value); + EXPECT_GT(result1, result2); + + value = 2.0f; + result1 = filter1->Apply(DurationFromMillis(3), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(3), value_scale, value); + EXPECT_LT(result1, result2); + + value = 20.0f; + result1 = filter1->Apply(DurationFromMillis(4), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(4), value_scale, value); + EXPECT_GT(result1, result2); + + value = 10.0f; + result1 = filter1->Apply(DurationFromMillis(5), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(5), value_scale, value); + EXPECT_LT(result1, result2); + + value = 50.0f; + result1 = filter1->Apply(DurationFromMillis(6), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(6), value_scale, value); + EXPECT_GT(result1, result2); + + value = 30.0f; + result1 = filter1->Apply(DurationFromMillis(7), value_scale, value); + result2 = filter2->Apply(DurationFromMillis(7), value_scale, value); + EXPECT_LT(result1, result2); +} + +TEST(RelativeVelocityFilterTest, SameValueScaleDifferentVelocityScalesLegacy) { + TestSameValueScaleDifferentVelocityScales( + DistanceEstimationMode::kLegacyTransition); +} + +TEST(RelativeVelocityFilterTest, + SameValueScaleDifferentVelocityScalesForceCurrentScale) { + TestSameValueScaleDifferentVelocityScales( + DistanceEstimationMode::kForceCurrentScale); +} + +void TestDifferentConstantValueScalesSameVelocityScale( + DistanceEstimationMode distance_mode) { + const float same_velocity_scale = 1.0f; + auto filter1 = absl::make_unique( + /*window_size=*/3, /*velocity_scale=*/same_velocity_scale, + /*distance_mode=*/distance_mode); + auto filter2 = absl::make_unique( + /*window_size=*/3, /*velocity_scale=*/same_velocity_scale, + /*distance_mode=*/distance_mode); + + float result1; + float result2; + float value; + // smaller value scale will decrease cumulative speed and alpha + // so with smaller scale and same other params filter will believe + // new values a little bit less + float value_scale1 = 0.5f; + float value_scale2 = 1.0f; + + value = 1.0f; + result1 = filter1->Apply(DurationFromMillis(1), value_scale1, value); + result2 = filter2->Apply(DurationFromMillis(1), value_scale2, value); + EXPECT_EQ(result1, result2); + + value = 10.0f; + result1 = filter1->Apply(DurationFromMillis(2), value_scale1, value); + result2 = filter2->Apply(DurationFromMillis(2), value_scale2, value); + EXPECT_LT(result1, result2); + + value = 2.0f; + result1 = filter1->Apply(DurationFromMillis(3), value_scale1, value); + result2 = filter2->Apply(DurationFromMillis(3), value_scale2, value); + EXPECT_GT(result1, result2); + + value = 20.0f; + result1 = filter1->Apply(DurationFromMillis(4), value_scale1, value); + result2 = filter2->Apply(DurationFromMillis(4), value_scale2, value); + EXPECT_LT(result1, result2); +} + +TEST(RelativeVelocityFilterTest, + DifferentConstantValueScalesSameVelocityScale) { + TestDifferentConstantValueScalesSameVelocityScale( + DistanceEstimationMode::kLegacyTransition); +} + +TEST(RelativeVelocityFilterTest, ApplyCheckValueScales) { + TestDifferentConstantValueScalesSameVelocityScale( + DistanceEstimationMode::kForceCurrentScale); +} + +void TestTranslationInvariance(DistanceEstimationMode distance_mode) { + struct ValueAtScale { + float value; + float scale; + }; + + // Note that the scales change over time. + std::vector original_data_points{ + // clang-format off + {.value = 1.0f, .scale = 0.5f}, + {.value = 10.0f, .scale = 5.0f}, + {.value = 20.0f, .scale = 10.0f}, + {.value = 30.0f, .scale = 15.0f}, + {.value = 40.0f, .scale = 0.5f}, + {.value = 50.0f, .scale = 0.5f}, + {.value = 60.0f, .scale = 5.0f}, + {.value = 70.0f, .scale = 10.0f}, + {.value = 80.0f, .scale = 15.0f}, + {.value = 90.0f, .scale = 5.0f}, + {.value = 70.0f, .scale = 10.0f}, + {.value = 50.0f, .scale = 15.0f}, + {.value = 80.0f, .scale = 15.0f}, + // clang-format on + }; + + // The amount by which the input values are uniformly translated. + const float kValueOffset = 100.0f; + + // The uniform time delta. + const absl::Duration time_delta = DurationFromMillis(1); + + // The filter parameters are the same between the two filters. + const size_t kWindowSize = 5; + const float kVelocityScale = 0.1f; + + // Perform the translation. + std::vector translated_data_points = original_data_points; + for (auto& point : translated_data_points) { + point.value += kValueOffset; + } + + auto original_points_filter = absl::make_unique( + /*window_size=*/kWindowSize, /*velocity_scale=*/kVelocityScale, + /*distance_mode=*/distance_mode); + auto translated_points_filter = absl::make_unique( + /*window_size=*/kWindowSize, /*velocity_scale=*/kVelocityScale, + /*distance_mode=*/distance_mode); + + // The minimal difference which is considered a divergence. + const float kDivergenceGap = 0.001f; + // The amount of the times this gap is achieved with `kLegacyTransition`. + // Note that on the first iteration the filters should output the unfiltered + // input values, so no divergence should occur. + // This amount obviously depends on the values in `original_data_points`, + // so should be changed accordingly when they are updated. + const size_t kDivergenceTimes = 5; + + // The minimal difference which is considered a large divergence. + const float kLargeDivergenceGap = 10.0f; + // The amount of times it is achieved. + // This amount obviously depends on the values in `original_data_points`, + // so should be changed accordingly when they are updated. + const size_t kLargeDivergenceTimes = 1; + + // In contrast, the new mode delivers this error bound across all the samples. + const float kForceCurrentScaleAbsoluteError = 1.53e-05f; + + size_t times_diverged = 0; + size_t times_largely_diverged = 0; + absl::Duration timestamp; + for (size_t iteration = 0; iteration < original_data_points.size(); + ++iteration, timestamp += time_delta) { + const ValueAtScale& original_data_point = original_data_points[iteration]; + const float filtered_original_value = + original_points_filter->Apply(/*timestamp=*/timestamp, + /*value_scale=*/original_data_point.scale, + /*value=*/original_data_point.value); + + const ValueAtScale& translated_data_point = + translated_data_points[iteration]; + const float actual_filtered_translated_value = + translated_points_filter->Apply( + /*timestamp=*/timestamp, + /*value_scale=*/translated_data_point.scale, + /*value=*/translated_data_point.value); + + const float expected_filtered_translated_value = + filtered_original_value + kValueOffset; + + const float difference = std::fabs(actual_filtered_translated_value - + expected_filtered_translated_value); + if (iteration == 0) { + // On the first iteration, the unfiltered values are returned. + EXPECT_EQ(filtered_original_value, original_data_point.value); + EXPECT_EQ(actual_filtered_translated_value, translated_data_point.value); + EXPECT_EQ(difference, 0.0f); + } else if (distance_mode == DistanceEstimationMode::kLegacyTransition) { + if (difference >= kDivergenceGap) { + ++times_diverged; + } + if (difference >= kLargeDivergenceGap) { + ++times_largely_diverged; + } + } else { + CHECK(distance_mode == DistanceEstimationMode::kForceCurrentScale); + EXPECT_NEAR(difference, 0.0f, kForceCurrentScaleAbsoluteError); + } + } + + if (distance_mode == DistanceEstimationMode::kLegacyTransition) { + EXPECT_GE(times_diverged, kDivergenceTimes); + EXPECT_GE(times_largely_diverged, kLargeDivergenceTimes); + } +} + +// This test showcases an undesired property of the current filter design +// that manifests itself when value scales change in time. It turns out that +// the velocity estimation starts depending on the distance from the origin. +TEST(RelativeVelocityFilterTest, + TestLegacyFilterModeIsNotTranslationInvariant) { + TestTranslationInvariance(DistanceEstimationMode::kLegacyTransition); +} + +TEST(RelativeVelocityFilterTest, TestOtherFilterModeIsTranslationInvariant) { + TestTranslationInvariance(DistanceEstimationMode::kForceCurrentScale); +} + +} // namespace mediapipe diff --git a/mediapipe/graphs/pose_tracking/subgraphs/BUILD b/mediapipe/graphs/pose_tracking/subgraphs/BUILD new file mode 100644 index 000000000..0d7a5aa5d --- /dev/null +++ b/mediapipe/graphs/pose_tracking/subgraphs/BUILD @@ -0,0 +1,50 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "upper_body_pose_renderer_gpu", + graph = "upper_body_pose_renderer_gpu.pbtxt", + register_as = "UpperBodyPoseRendererGpu", + deps = [ + "//mediapipe/calculators/core:split_normalized_landmark_list_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_scale_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "upper_body_pose_renderer_cpu", + graph = "upper_body_pose_renderer_cpu.pbtxt", + register_as = "UpperBodyPoseRendererCpu", + deps = [ + "//mediapipe/calculators/core:split_normalized_landmark_list_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_scale_calculator", + ], +) diff --git a/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_cpu.pbtxt b/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_cpu.pbtxt new file mode 100644 index 000000000..5878dccde --- /dev/null +++ b/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_cpu.pbtxt @@ -0,0 +1,254 @@ +# MediaPipe pose landmarks rendering subgraph. + +type: "UpperBodyPoseRendererCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_image" +# Pose landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:pose_landmarks" +# Region of interest calculated based on landmarks. (NormalizedRect) +input_stream: "ROI:roi" +# Detected pose. (Detection) +input_stream: "DETECTION:detection" + +# CPU image with rendered data. (ImageFrame) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_image" + output_stream: "SIZE:image_size" +} + +# Calculates rendering scale based on the pose roi. +node { + calculator: "RectToRenderScaleCalculator" + input_stream: "NORM_RECT:roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "RENDER_SCALE:render_scale" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderScaleCalculatorOptions] { + multiplier: 0.0012 + } + } +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTION:detection" + output_stream: "RENDER_DATA:detection_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "visible_pose_landmarks" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 0 end: 25 } + } + } +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:pose_landmarks" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_connections: 0 + landmark_connections: 1 + landmark_connections: 1 + landmark_connections: 2 + landmark_connections: 2 + landmark_connections: 3 + landmark_connections: 3 + landmark_connections: 7 + landmark_connections: 0 + landmark_connections: 4 + landmark_connections: 4 + landmark_connections: 5 + landmark_connections: 5 + landmark_connections: 6 + landmark_connections: 6 + landmark_connections: 8 + landmark_connections: 9 + landmark_connections: 10 + landmark_connections: 11 + landmark_connections: 12 + landmark_connections: 11 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 15 + landmark_connections: 15 + landmark_connections: 17 + landmark_connections: 15 + landmark_connections: 19 + landmark_connections: 15 + landmark_connections: 21 + landmark_connections: 17 + landmark_connections: 19 + landmark_connections: 12 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 16 + landmark_connections: 16 + landmark_connections: 18 + landmark_connections: 16 + landmark_connections: 20 + landmark_connections: 16 + landmark_connections: 22 + landmark_connections: 18 + landmark_connections: 20 + landmark_connections: 11 + landmark_connections: 23 + landmark_connections: 12 + landmark_connections: 24 + landmark_connections: 23 + landmark_connections: 24 + + landmark_color { r: 255 g: 255 b: 255 } + connection_color { r: 255 g: 255 b: 255 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Take left pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "landmarks_left_side" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 1 end: 4 } + ranges: { begin: 7 end: 8 } + ranges: { begin: 9 end: 10 } + ranges: { begin: 11 end: 12 } + ranges: { begin: 13 end: 14 } + ranges: { begin: 15 end: 16 } + ranges: { begin: 17 end: 18 } + ranges: { begin: 19 end: 20 } + ranges: { begin: 21 end: 22 } + ranges: { begin: 23 end: 24 } + + combine_outputs: true + } + } +} + +# Take right pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "landmarks_right_side" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 4 end: 7 } + ranges: { begin: 8 end: 9 } + ranges: { begin: 10 end: 11 } + ranges: { begin: 12 end: 13 } + ranges: { begin: 14 end: 15 } + ranges: { begin: 16 end: 17 } + ranges: { begin: 18 end: 19 } + ranges: { begin: 20 end: 21 } + ranges: { begin: 22 end: 23 } + ranges: { begin: 24 end: 25 } + + combine_outputs: true + } + } +} + +# Render pose joints as big white circles. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:visible_pose_landmarks" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_background_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 255 b: 255 } + connection_color { r: 255 g: 255 b: 255 } + thickness: 5.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Render pose left side joints as orange circles (inside white ones). +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:landmarks_left_side" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_left_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 138 b: 0 } + connection_color { r: 255 g: 138 b: 0 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Render pose right side joints as cyan circles (inside white ones). +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:landmarks_right_side" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_right_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 0 g: 217 b: 231 } + connection_color { r: 0 g: 217 b: 231 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECT:roi" + output_stream: "RENDER_DATA:roi_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:input_image" + input_stream: "detection_render_data" + input_stream: "landmarks_render_data" + input_stream: "landmarks_background_joints_render_data" + input_stream: "landmarks_left_joints_render_data" + input_stream: "landmarks_right_joints_render_data" + input_stream: "roi_render_data" + output_stream: "IMAGE:output_image" +} diff --git a/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_gpu.pbtxt b/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_gpu.pbtxt new file mode 100644 index 000000000..aed1044bc --- /dev/null +++ b/mediapipe/graphs/pose_tracking/subgraphs/upper_body_pose_renderer_gpu.pbtxt @@ -0,0 +1,254 @@ +# MediaPipe pose landmarks rendering subgraph. + +type: "UpperBodyPoseRendererGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:input_image" +# Pose landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:pose_landmarks" +# Region of interest calculated based on landmarks. (NormalizedRect) +input_stream: "ROI:roi" +# Detected pose. (Detection) +input_stream: "DETECTION:detection" + +# GPU image with rendered data. (GpuBuffer) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_image" + output_stream: "SIZE:image_size" +} + +# Calculates rendering scale based on the pose roi. +node { + calculator: "RectToRenderScaleCalculator" + input_stream: "NORM_RECT:roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "RENDER_SCALE:render_scale" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderScaleCalculatorOptions] { + multiplier: 0.0012 + } + } +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTION:detection" + output_stream: "RENDER_DATA:detection_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "visible_pose_landmarks" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 0 end: 25 } + } + } +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:pose_landmarks" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_connections: 0 + landmark_connections: 1 + landmark_connections: 1 + landmark_connections: 2 + landmark_connections: 2 + landmark_connections: 3 + landmark_connections: 3 + landmark_connections: 7 + landmark_connections: 0 + landmark_connections: 4 + landmark_connections: 4 + landmark_connections: 5 + landmark_connections: 5 + landmark_connections: 6 + landmark_connections: 6 + landmark_connections: 8 + landmark_connections: 9 + landmark_connections: 10 + landmark_connections: 11 + landmark_connections: 12 + landmark_connections: 11 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 15 + landmark_connections: 15 + landmark_connections: 17 + landmark_connections: 15 + landmark_connections: 19 + landmark_connections: 15 + landmark_connections: 21 + landmark_connections: 17 + landmark_connections: 19 + landmark_connections: 12 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 16 + landmark_connections: 16 + landmark_connections: 18 + landmark_connections: 16 + landmark_connections: 20 + landmark_connections: 16 + landmark_connections: 22 + landmark_connections: 18 + landmark_connections: 20 + landmark_connections: 11 + landmark_connections: 23 + landmark_connections: 12 + landmark_connections: 24 + landmark_connections: 23 + landmark_connections: 24 + + landmark_color { r: 255 g: 255 b: 255 } + connection_color { r: 255 g: 255 b: 255 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Take left pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "landmarks_left_side" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 1 end: 4 } + ranges: { begin: 7 end: 8 } + ranges: { begin: 9 end: 10 } + ranges: { begin: 11 end: 12 } + ranges: { begin: 13 end: 14 } + ranges: { begin: 15 end: 16 } + ranges: { begin: 17 end: 18 } + ranges: { begin: 19 end: 20 } + ranges: { begin: 21 end: 22 } + ranges: { begin: 23 end: 24 } + + combine_outputs: true + } + } +} + +# Take right pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "landmarks_right_side" + node_options: { + [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] { + ranges: { begin: 4 end: 7 } + ranges: { begin: 8 end: 9 } + ranges: { begin: 10 end: 11 } + ranges: { begin: 12 end: 13 } + ranges: { begin: 14 end: 15 } + ranges: { begin: 16 end: 17 } + ranges: { begin: 18 end: 19 } + ranges: { begin: 20 end: 21 } + ranges: { begin: 22 end: 23 } + ranges: { begin: 24 end: 25 } + + combine_outputs: true + } + } +} + +# Render pose joints as big white circles. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:visible_pose_landmarks" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_background_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 255 b: 255 } + connection_color { r: 255 g: 255 b: 255 } + thickness: 5.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Render pose left side joints as orange circles (inside white ones). +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:landmarks_left_side" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_left_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 255 g: 138 b: 0 } + connection_color { r: 255 g: 138 b: 0 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Render pose right side joints as cyan circles (inside white ones). +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:landmarks_right_side" + input_stream: "RENDER_SCALE:render_scale" + output_stream: "RENDER_DATA:landmarks_right_joints_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_color { r: 0 g: 217 b: 231 } + connection_color { r: 0 g: 217 b: 231 } + thickness: 3.0 + visualize_landmark_depth: false + utilize_visibility: true + visibility_threshold: 0.1 + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECT:roi" + output_stream: "RENDER_DATA:roi_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE_GPU:input_image" + input_stream: "detection_render_data" + input_stream: "landmarks_render_data" + input_stream: "landmarks_background_joints_render_data" + input_stream: "landmarks_left_joints_render_data" + input_stream: "landmarks_right_joints_render_data" + input_stream: "roi_render_data" + output_stream: "IMAGE_GPU:output_image" +} diff --git a/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.pbtxt b/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.pbtxt new file mode 100644 index 000000000..4e4b5da38 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_cpu.pbtxt @@ -0,0 +1,72 @@ +# MediaPipe graph that performs upper-body pose tracking with TensorFlow Lite on CPU. + +# CPU buffer. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" +# Pose landmarks. (NormalizedLandmarkList) +output_stream: "pose_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Subgraph that detects poses and corresponding landmarks. +node { + calculator: "PoseLandmarkUpperBodyCpu" + input_stream: "IMAGE:throttled_input_video" + output_stream: "LANDMARKS:pose_landmarks" + output_stream: "DETECTION:pose_detection" + output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "SIZE:image_size" +} + +# Smoothes pose landmarks in order to reduce jitter. +node { + calculator: "LandmarksSmoothingCalculator" + input_stream: "NORM_LANDMARKS:pose_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_FILTERED_LANDMARKS:pose_landmarks_smoothed" + node_options: { + [type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] { + velocity_filter: { + window_size: 5 + velocity_scale: 10.0 + } + } + } +} + +# Subgraph that renders pose-landmark annotation onto the input image. +node { + calculator: "UpperBodyPoseRendererCpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:pose_landmarks_smoothed" + input_stream: "ROI:roi_from_landmarks" + input_stream: "DETECTION:pose_detection" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt b/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt new file mode 100644 index 000000000..5f6084690 --- /dev/null +++ b/mediapipe/graphs/pose_tracking/upper_body_pose_tracking_gpu.pbtxt @@ -0,0 +1,72 @@ +# MediaPipe graph that performs upper-body pose tracking with TensorFlow Lite on GPU. + +# GPU buffer. (GpuBuffer) +input_stream: "input_video" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Pose landmarks. (NormalizedLandmarkList) +output_stream: "pose_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Subgraph that detects poses and corresponding landmarks. +node { + calculator: "PoseLandmarkUpperBodyGpu" + input_stream: "IMAGE:throttled_input_video" + output_stream: "LANDMARKS:pose_landmarks" + output_stream: "DETECTION:pose_detection" + output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "SIZE:image_size" +} + +# Smoothes pose landmarks in order to reduce jitter. +node { + calculator: "LandmarksSmoothingCalculator" + input_stream: "NORM_LANDMARKS:pose_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_FILTERED_LANDMARKS:pose_landmarks_smoothed" + node_options: { + [type.googleapis.com/mediapipe.LandmarksSmoothingCalculatorOptions] { + velocity_filter: { + window_size: 5 + velocity_scale: 10.0 + } + } + } +} + +# Subgraph that renders pose-landmark annotation onto the input image. +node { + calculator: "UpperBodyPoseRendererGpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:pose_landmarks_smoothed" + input_stream: "ROI:roi_from_landmarks" + input_stream: "DETECTION:pose_detection" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/models/README.md b/mediapipe/models/README.md index 50246e92d..9222cd87d 100644 --- a/mediapipe/models/README.md +++ b/mediapipe/models/README.md @@ -23,6 +23,28 @@ * [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html) * [Model card](https://mediapipe.page.link/handmc) +### Iris + * Iris landmarks: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/iris_landmark.tflite) + * Paper: + [Real-time Pupil Tracking from Monocular Video for Digital Puppetry](https://arxiv.org/abs/2006.11341) + ([presentation](https://youtu.be/cIhXkiiapQI)) + * Google AI Blog: + [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation](https://ai.googleblog.com/2020/08/mediapipe-iris-real-time-iris-tracking.html) + * [Model card](https://mediapipe.page.link/iris-mc) + +### Pose + * Pose detection: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection.tflite) + * Upper-body pose landmarks: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite) + * Paper: + [BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204) + ([presentation](https://youtu.be/YPpUOTRn5tA)) + * Google AI Blog: + [BlazePose - On-device Real-time Body Pose Tracking](https://mediapipe.page.link/blazepose-blog) + * [Model card](https://mediapipe.page.link/blazepose-mc) + ### Hair Segmentation * [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite) * [Model page](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation) diff --git a/mediapipe/modules/README.md b/mediapipe/modules/README.md index 219c4f591..c38ff9a50 100644 --- a/mediapipe/modules/README.md +++ b/mediapipe/modules/README.md @@ -9,4 +9,5 @@ Each module (represented as a subfolder) provides subgraphs and corresponding re | [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. | | [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. | | [`iris_landmark`](iris_landmark/README.md) | Subgraphs to detect iris landmarks. | - +| [`pose_detection`](pose_detection/README.md) | Subgraphs to detect poses. | +| [`pose_landmark`](pose_landmark/README.md) | Subgraphs to detect and track pose landmarks. | diff --git a/mediapipe/modules/pose_detection/BUILD b/mediapipe/modules/pose_detection/BUILD new file mode 100644 index 000000000..9701bfcb6 --- /dev/null +++ b/mediapipe/modules/pose_detection/BUILD @@ -0,0 +1,58 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "pose_detection_cpu", + graph = "pose_detection_cpu.pbtxt", + register_as = "PoseDetectionCpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_detection_gpu", + graph = "pose_detection_gpu.pbtxt", + register_as = "PoseDetectionGpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +exports_files( + srcs = [ + "pose_detection.tflite", + ], +) diff --git a/mediapipe/modules/pose_detection/README.md b/mediapipe/modules/pose_detection/README.md new file mode 100644 index 000000000..e2e3b2f24 --- /dev/null +++ b/mediapipe/modules/pose_detection/README.md @@ -0,0 +1,7 @@ +# pose_detection + +Subgraphs|Details +:--- | :--- +[`PoseDetectionCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt)| Detects poses. (CPU input, and inference is executed on CPU.) +[`PoseDetectionGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt)| Detects poses. (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/pose_detection/pose_detection.tflite b/mediapipe/modules/pose_detection/pose_detection.tflite new file mode 100755 index 000000000..ababe424a Binary files /dev/null and b/mediapipe/modules/pose_detection/pose_detection.tflite differ diff --git a/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt new file mode 100644 index 000000000..a0e6a152c --- /dev/null +++ b/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt @@ -0,0 +1,155 @@ +# MediaPipe graph to detect poses. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseDetectionCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:pose_detections" +# } + +type: "PoseDetectionCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Detected poses. (std::vector) +# Bounding box in each pose detection is currently set to the bounding box of +# the detected face. However, 4 additional key points are available in each +# detection, which are used to further calculate a (rotated) bounding box that +# encloses the body region of interest. Among the 4 key points, the first two +# are for identifying the full-body region, and the second two for upper body +# only: +# +# Key point 0 - mid hip center +# Key point 1 - point that encodes size & rotation (for full body) +# Key point 2 - mid shoulder center +# Key point 3 - point that encodes size & rotation (for upper body) +# +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of poses detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image on CPU to a 128x128 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:image" + output_stream: "IMAGE:transformed_image" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 128 + output_height: 128 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on CPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_image" + output_stream: "TENSORS:input_tensors" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" + delegate { xnnpack {} } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 128 + input_size_width: 128 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 896 + num_coords: 12 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 4 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt new file mode 100644 index 000000000..b75397bc2 --- /dev/null +++ b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt @@ -0,0 +1,155 @@ +# MediaPipe graph to detect poses. (GPU input, and inference is executed on +# GPU.) +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseDetectionGpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:pose_detections" +# } + +type: "PoseDetectionGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Detected poses. (std::vector) +# Bounding box in each pose detection is currently set to the bounding box of +# the detected face. However, 4 additional key points are available in each +# detection, which are used to further calculate a (rotated) bounding box that +# encloses the body region of interest. Among the 4 key points, the first two +# are for identifying the full-body region, and the second two for upper body +# only: +# +# Key point 0 - mid hip center +# Key point 1 - point that encodes size & rotation (for full body) +# Key point 2 - mid shoulder center +# Key point 3 - point that encodes size & rotation (for upper body) +# +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of poses detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image on GPU to a 128x128 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "IMAGE_GPU:transformed_image" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 128 + output_height: 128 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_image" + output_stream: "TENSORS_GPU:input_tensors" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:input_tensors" + # TODO: we can use TENSORS_GPU here and in the downstream calculator + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 128 + input_size_width: 128 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 896 + num_coords: 12 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 4 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/pose_landmark/BUILD b/mediapipe/modules/pose_landmark/BUILD new file mode 100644 index 000000000..c75d70b06 --- /dev/null +++ b/mediapipe/modules/pose_landmark/BUILD @@ -0,0 +1,124 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "pose_landmark_upper_body_by_roi_gpu", + graph = "pose_landmark_upper_body_by_roi_gpu.pbtxt", + register_as = "PoseLandmarkUpperBodyByRoiGpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_upper_body_by_roi_cpu", + graph = "pose_landmark_upper_body_by_roi_cpu.pbtxt", + register_as = "PoseLandmarkUpperBodyByRoiCpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_upper_body_gpu", + graph = "pose_landmark_upper_body_gpu.pbtxt", + register_as = "PoseLandmarkUpperBodyGpu", + deps = [ + ":pose_detection_to_roi", + ":pose_landmark_upper_body_by_roi_gpu", + ":pose_landmark_upper_body_landmarks_to_roi", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:packet_presence_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/pose_detection:pose_detection_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_upper_body_cpu", + graph = "pose_landmark_upper_body_cpu.pbtxt", + register_as = "PoseLandmarkUpperBodyCpu", + deps = [ + ":pose_detection_to_roi", + ":pose_landmark_upper_body_by_roi_cpu", + ":pose_landmark_upper_body_landmarks_to_roi", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:packet_presence_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/pose_detection:pose_detection_cpu", + ], +) + +exports_files( + srcs = [ + "pose_landmark_upper_body.tflite", + ], +) + +mediapipe_simple_subgraph( + name = "pose_detection_to_roi", + graph = "pose_detection_to_roi.pbtxt", + register_as = "PoseDetectionToRoi", + deps = [ + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_upper_body_landmarks_to_roi", + graph = "pose_landmark_upper_body_landmarks_to_roi.pbtxt", + register_as = "PoseLandmarkUpperBodyLandmarksToRoi", + deps = [ + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) diff --git a/mediapipe/modules/pose_landmark/README.md b/mediapipe/modules/pose_landmark/README.md new file mode 100644 index 000000000..f0c55252e --- /dev/null +++ b/mediapipe/modules/pose_landmark/README.md @@ -0,0 +1,9 @@ +# pose_landmark + +Subgraphs|Details +:--- | :--- +[`PoseLandmarkUpperBodyByRoiCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_cpu.pbtxt)| Detects landmarks of a single pose. See landmarks (key points) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg). (CPU input, and inference is executed on CPU.) +[`PoseLandmarkUpperBodyByRoiGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt)| Detects landmarks of a single pose. See landmarks (key points) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg). (GPU input, and inference is executed on GPU) +[`PoseLandmarkUpperBodyCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_cpu.pbtxt)| Detects and tracks landmarks of a single pose. See landmarks (key points) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg). (CPU input, and inference is executed on CPU) +[`PoseLandmarkUpperBodyGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt)| Detects and tracks landmarks of a single pose. See landmarks (key points) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg). (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt b/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt new file mode 100644 index 000000000..06b2476ba --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt @@ -0,0 +1,46 @@ +# MediaPipe graph to calculate pose region of interest (ROI) from a detection +# provided by "PoseDetectionCpu" or "PoseDetectionGpu" +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "PoseDetectionToRoi" + +# Pose detection. (Detection) +input_stream: "DETECTION:detection" +# Frame size (width and height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to the first detection of input detections. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts pose detection into a rectangle based on center and scale alignment +# points. Pose detection contains four key points: first two for full-body pose +# and two more for upper-body pose. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 2 + rotation_vector_end_keypoint_index: 3 + rotation_vector_target_angle_degrees: 90 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands pose rect with marging used during training. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite b/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite new file mode 100755 index 000000000..8d36c1b19 Binary files /dev/null and b/mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite differ diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_cpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_cpu.pbtxt new file mode 100644 index 000000000..9b19c4b72 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_cpu.pbtxt @@ -0,0 +1,192 @@ +# MediaPipe graph to detect/predict upper-body pose landmarks. (CPU input, and +# inference is executed on CPU.) +# +# It is required that "pose_landmark_upper_body.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkUpperBodyByRoiCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "LANDMARKS:landmarks" +# } + +type: "PoseLandmarkUpperBodyByRoiCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a pose is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 25 (upper-body) landmarks +# (see pose_landmark_upper_body_topology.svg), and there are other auxiliary key +# points. +# 0 - nose +# 1 - right eye (inner) +# 2 - right eye +# 3 - right eye (outer) +# 4 - left eye (inner) +# 5 - left eye +# 6 - left eye (outer) +# 7 - right ear +# 8 - left ear +# 9 - mouth (right) +# 10 - mouth (left) +# 11 - right shoulder +# 12 - left shoulder +# 13 - right elbow +# 14 - left elbow +# 15 - right wrist +# 16 - left wrist +# 17 - right pinky +# 18 - left pinky +# 19 - right index +# 20 - left index +# 21 - right thumb +# 22 - left thumb +# 23 - right hip +# 24 - left hip +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:landmarks" + +# Crops the rectangle that contains a pose from the input image. +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE:pose_region" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +# Transforms the input image on CPU to a 256x256 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:pose_region" + output_stream: "IMAGE:transformed_pose_region" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 256 + output_height: 256 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on CPU into a tensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_pose_region" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + zero_center: false + } + } +} + +# Runs a TensorFlow Lite model inference on CPU. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" + delegate { xnnpack {} } + } + } +} + +# Splits a vector of TFLite tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "pose_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the pose-flag tensor into a float that represents the confidence +# score of pose presence. +node { + calculator: "TfLiteTensorsToFloatsCalculator" + input_stream: "TENSORS:pose_flag_tensor" + output_stream: "FLOAT:pose_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a pose is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:pose_presence_score" + output_stream: "FLAG:pose_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if pose is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:pose_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:raw_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 31 + input_image_width: 256 + input_image_height: 256 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed pose +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (pose +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:raw_landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:adjusted_landmarks" +} + +# Projects the landmarks from the cropped pose image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:adjusted_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:landmarks" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_gpu.pbtxt new file mode 100644 index 000000000..6197bdea0 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_by_roi_gpu.pbtxt @@ -0,0 +1,191 @@ +# MediaPipe graph to detect/predict upper-body pose landmarks. (GPU input, and +# inference is executed on GPU.) +# +# It is required that "pose_landmark_upper_body.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkUpperBodyByRoiGpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "LANDMARKS:landmarks" +# } + +type: "PoseLandmarkUpperBodyByRoiGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a pose is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 25 (upper-body) landmarks +# (see pose_landmark_upper_body_topology.svg), and there are other auxiliary key +# points. +# 0 - nose +# 1 - right eye (inner) +# 2 - right eye +# 3 - right eye (outer) +# 4 - left eye (inner) +# 5 - left eye +# 6 - left eye (outer) +# 7 - right ear +# 8 - left ear +# 9 - mouth (right) +# 10 - mouth (left) +# 11 - right shoulder +# 12 - left shoulder +# 13 - right elbow +# 14 - left elbow +# 15 - right wrist +# 16 - left wrist +# 17 - right pinky +# 18 - left pinky +# 19 - right index +# 20 - left index +# 21 - right thumb +# 22 - left thumb +# 23 - right hip +# 24 - left hip +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:landmarks" + +# Crops the rectangle that contains a pose from the input image. +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE_GPU:pose_region" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +# Transforms the input image on GPU to a 256x256 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:pose_region" + output_stream: "IMAGE_GPU:transformed_pose_region" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 256 + output_height: 256 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on GPU into a tensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_pose_region" + output_stream: "TENSORS_GPU:input_tensors" + options: { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + zero_center: false + } + } +} + +# Runs a TensorFlow Lite model inference on GPU. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:input_tensors" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" + } + } +} + +# Splits a vector of TFLite tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "pose_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the pose-flag tensor into a float that represents the confidence +# score of pose presence. +node { + calculator: "TfLiteTensorsToFloatsCalculator" + input_stream: "TENSORS:pose_flag_tensor" + output_stream: "FLOAT:pose_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a pose is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:pose_presence_score" + output_stream: "FLAG:pose_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if pose is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:pose_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:raw_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 31 + input_image_width: 256 + input_image_height: 256 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed pose +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (pose +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:raw_landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:adjusted_landmarks" +} + +# Projects the landmarks from the cropped pose image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:adjusted_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:landmarks" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_cpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_cpu.pbtxt new file mode 100644 index 000000000..2193e3317 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_cpu.pbtxt @@ -0,0 +1,170 @@ +# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip pose detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# It is required that "pose_landmark_upper_body.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkUpperBodyCpu" +# input_stream: "IMAGE:image" +# output_stream: "LANDMARKS:pose_landmarks" +# } + +type: "PoseLandmarkUpperBodyCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 25 (upper-body) landmarks +# (see pose_landmark_upper_body_topology.svg), and there are other auxiliary key +# points. +# 0 - nose +# 1 - right eye (inner) +# 2 - right eye +# 3 - right eye (outer) +# 4 - left eye (inner) +# 5 - left eye +# 6 - left eye (outer) +# 7 - right ear +# 8 - left ear +# 9 - mouth (right) +# 10 - mouth (left) +# 11 - right shoulder +# 12 - left shoulder +# 13 - right elbow +# 14 - left elbow +# 15 - right wrist +# 16 - left wrist +# 17 - right pinky +# 18 - left pinky +# 19 - right index +# 20 - left index +# 21 - right thumb +# 22 - left thumb +# 23 - right hip +# 24 - left hip +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:pose_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected poses. (Detection) +output_stream: "DETECTION:pose_detection" +# Regions of interest calculated based on landmarks. (NormalizedRect) +output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" +# Regions of interest calculated based on pose detections. (NormalizedRect) +output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" + +# Caches pose rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# pose rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:pose_rect_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" +} + +# Checks if there's previous pose rect calculatoed from landmarks. +node: { + calculator: "PacketPresenceCalculator" + input_stream: "PACKET:prev_pose_rect_from_landmarks" + output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Drops the incoming image if PoseLandmarkUpperBodyByRoiCpu was able to identify +# pose presence in the previous image. Otherwise, passes the incoming image +# through to trigger a new round of pose detection in PoseDetectionCpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "image_size" + input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" + output_stream: "image_for_pose_detection" + output_stream: "image_size_for_pose_detection" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects poses. +node { + calculator: "PoseDetectionCpu" + input_stream: "IMAGE:image_for_pose_detection" + output_stream: "DETECTIONS:pose_detections" +} + +# Gets the very first detection from "pose_detections" vector. +node { + calculator: "SplitDetectionVectorCalculator" + input_stream: "pose_detections" + output_stream: "pose_detection" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + element_only: true + } + } +} + +# Calculates region of interest based on pose detection, so that can be used +# to detect landmarks. +node { + calculator: "PoseDetectionToRoi" + input_stream: "DETECTION:pose_detection" + input_stream: "IMAGE_SIZE:image_size_for_pose_detection" + output_stream: "ROI:pose_rect_from_detection" +} + +# Selects either pose rect (or ROI) calculated from detection or from previously +# detected landmarks if available (in this case, calculation of pose rect from +# detection is skipped). +node { + calculator: "MergeCalculator" + input_stream: "pose_rect_from_detection" + input_stream: "prev_pose_rect_from_landmarks" + output_stream: "pose_rect" +} + +# Detects pose landmarks within specified region of interest of the image. +node { + calculator: "PoseLandmarkUpperBodyByRoiCpu" + input_stream: "IMAGE:image" + input_stream: "ROI:pose_rect" + output_stream: "LANDMARKS:pose_landmarks" +} + +# Calculates region of interest based on pose landmarks, so that can be reused +# for subsequent image. +node { + calculator: "PoseLandmarkUpperBodyLandmarksToRoi" + input_stream: "LANDMARKS:pose_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:pose_rect_from_landmarks" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt new file mode 100644 index 000000000..5666b2d9e --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_gpu.pbtxt @@ -0,0 +1,170 @@ +# MediaPipe graph to detect/predict pose landmarks. (GPU input, and inference is +# executed on GPU.) This graph tries to skip pose detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# It is required that "pose_landmark_upper_body.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_upper_body.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkUpperBodyGpu" +# input_stream: "IMAGE:image" +# output_stream: "LANDMARKS:pose_landmarks" +# } + +type: "PoseLandmarkUpperBodyGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 25 (upper-body) landmarks +# (see pose_landmark_upper_body_topology.svg), and there are other auxiliary key +# points. +# 0 - nose +# 1 - right eye (inner) +# 2 - right eye +# 3 - right eye (outer) +# 4 - left eye (inner) +# 5 - left eye +# 6 - left eye (outer) +# 7 - right ear +# 8 - left ear +# 9 - mouth (right) +# 10 - mouth (left) +# 11 - right shoulder +# 12 - left shoulder +# 13 - right elbow +# 14 - left elbow +# 15 - right wrist +# 16 - left wrist +# 17 - right pinky +# 18 - left pinky +# 19 - right index +# 20 - left index +# 21 - right thumb +# 22 - left thumb +# 23 - right hip +# 24 - left hip +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:pose_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected poses. (Detection) +output_stream: "DETECTION:pose_detection" +# Regions of interest calculated based on landmarks. (NormalizedRect) +output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" +# Regions of interest calculated based on pose detections. (NormalizedRect) +output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" + +# Caches pose rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# pose rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:pose_rect_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" +} + +# Checks if there's previous pose rect calculatoed from landmarks. +node: { + calculator: "PacketPresenceCalculator" + input_stream: "PACKET:prev_pose_rect_from_landmarks" + output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Drops the incoming image if PoseLandmarkUpperBodyByRoiGpu was able to identify +# pose presence in the previous image. Otherwise, passes the incoming image +# through to trigger a new round of pose detection in PoseDetectionGpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "image_size" + input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" + output_stream: "image_for_pose_detection" + output_stream: "image_size_for_pose_detection" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects poses. +node { + calculator: "PoseDetectionGpu" + input_stream: "IMAGE:image_for_pose_detection" + output_stream: "DETECTIONS:pose_detections" +} + +# Gets the very first detection from "pose_detections" vector. +node { + calculator: "SplitDetectionVectorCalculator" + input_stream: "pose_detections" + output_stream: "pose_detection" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + element_only: true + } + } +} + +# Calculates region of interest based on pose detection, so that can be used +# to detect landmarks. +node { + calculator: "PoseDetectionToRoi" + input_stream: "DETECTION:pose_detection" + input_stream: "IMAGE_SIZE:image_size_for_pose_detection" + output_stream: "ROI:pose_rect_from_detection" +} + +# Selects either pose rect (or ROI) calculated from detection or from previously +# detected landmarks if available (in this case, calculation of pose rect from +# detection is skipped). +node { + calculator: "MergeCalculator" + input_stream: "pose_rect_from_detection" + input_stream: "prev_pose_rect_from_landmarks" + output_stream: "pose_rect" +} + +# Detects pose landmarks within specified region of interest of the image. +node { + calculator: "PoseLandmarkUpperBodyByRoiGpu" + input_stream: "IMAGE:image" + input_stream: "ROI:pose_rect" + output_stream: "LANDMARKS:pose_landmarks" +} + +# Calculates region of interest based on pose landmarks, so that can be reused +# for subsequent image. +node { + calculator: "PoseLandmarkUpperBodyLandmarksToRoi" + input_stream: "LANDMARKS:pose_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:pose_rect_from_landmarks" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_landmarks_to_roi.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_landmarks_to_roi.pbtxt new file mode 100644 index 000000000..987de9958 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_landmarks_to_roi.pbtxt @@ -0,0 +1,64 @@ +# MediaPipe graph to calculate pose region of interest (ROI) from landmarks +# detected by "PoseLandmarkUpperBodyByRoiCpu" or +# "PoseLandmarkUpperBodyByRoiGpu". +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "PoseLandmarkUpperBodyLandmarksToRoi" + +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Frame size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks" + output_stream: "alignment_landmarks" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 25 end: 27 } + } + } +} + +# Converts landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:alignment_landmarks" + output_stream: "DETECTION:detection" +} + +# Converts detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: 90 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands pose rect with marging used during training. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg new file mode 100644 index 000000000..09373251f --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_upper_body_topology.svg @@ -0,0 +1,514 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3 + 2 + 1 + 4 + 5 + 6 + 0 + 7 + 8 + 9 + 10 + 11 + 12 + 22 + 21 + 19 + 17 + 15 + 13 + 4 + 16 + 18 + 20 + 24 + 23 + + diff --git a/mediapipe/python/BUILD b/mediapipe/python/BUILD new file mode 100644 index 000000000..bf8d84317 --- /dev/null +++ b/mediapipe/python/BUILD @@ -0,0 +1,50 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "builtin_calculators", + deps = [ + "//mediapipe/calculators/core:pass_through_calculator", + "//mediapipe/graphs/pose_tracking:upper_body_pose_tracking_cpu_deps", + ], +) + +pybind_extension( + name = "_framework_bindings", + srcs = ["framework_bindings.cc"], + linkopts = [ + "-lopencv_core", + "-lopencv_imgproc", + "-lopencv_highgui", + "-lopencv_video", + "-lopencv_features2d", + "-lopencv_calib3d", + "-lopencv_imgcodecs", + ], + deps = [ + ":builtin_calculators", + "//mediapipe/python/pybind:calculator_graph", + "//mediapipe/python/pybind:image_frame", + "//mediapipe/python/pybind:matrix", + "//mediapipe/python/pybind:packet", + "//mediapipe/python/pybind:packet_creator", + "//mediapipe/python/pybind:packet_getter", + "//mediapipe/python/pybind:resource_util", + "//mediapipe/python/pybind:timestamp", + ], +) diff --git a/mediapipe/python/__init__.py b/mediapipe/python/__init__.py new file mode 100644 index 000000000..197288743 --- /dev/null +++ b/mediapipe/python/__init__.py @@ -0,0 +1,26 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MediaPipe Python API.""" + +from mediapipe.python._framework_bindings import resource_util +from mediapipe.python._framework_bindings.calculator_graph import CalculatorGraph +from mediapipe.python._framework_bindings.calculator_graph import GraphInputStreamAddMode +from mediapipe.python._framework_bindings.image_frame import ImageFormat +from mediapipe.python._framework_bindings.image_frame import ImageFrame +from mediapipe.python._framework_bindings.matrix import Matrix +from mediapipe.python._framework_bindings.packet import Packet +from mediapipe.python._framework_bindings.timestamp import Timestamp +import mediapipe.python.packet_creator +import mediapipe.python.packet_getter diff --git a/mediapipe/python/calculator_graph_test.py b/mediapipe/python/calculator_graph_test.py new file mode 100644 index 000000000..601f89a98 --- /dev/null +++ b/mediapipe/python/calculator_graph_test.py @@ -0,0 +1,178 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for mediapipe.python._framework_bindings.calculator_graph.""" + +# Dependency imports + +from absl.testing import absltest +import mediapipe.python as mp +from google.protobuf import text_format +from mediapipe.framework import calculator_pb2 + + +class GraphTest(absltest.TestCase): + + def testInvalidBinaryGraphFile(self): + with self.assertRaisesRegex(FileNotFoundError, 'No such file or directory'): + mp.CalculatorGraph(binary_graph_path='/tmp/abc.binarypb') + + def testInvalidNodeConfig(self): + text_config = """ + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + input_stream: 'in' + output_stream: 'out' + } + """ + config_proto = calculator_pb2.CalculatorGraphConfig() + text_format.Parse(text_config, config_proto) + with self.assertRaisesRegex( + ValueError, + 'Input and output streams to PassThroughCalculator must use matching tags and indexes.' + ): + mp.CalculatorGraph(graph_config=config_proto) + + def testInvalidCalculatorType(self): + text_config = """ + node { + calculator: 'SomeUnknownCalculator' + input_stream: 'in' + output_stream: 'out' + } + """ + config_proto = calculator_pb2.CalculatorGraphConfig() + text_format.Parse(text_config, config_proto) + with self.assertRaisesRegex( + RuntimeError, 'Unable to find Calculator \"SomeUnknownCalculator\"'): + mp.CalculatorGraph(graph_config=config_proto) + + def testGraphInitializedWithProtoConfig(self): + text_config = """ + max_queue_size: 1 + input_stream: 'in' + output_stream: 'out' + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + output_stream: 'out' + } + """ + config_proto = calculator_pb2.CalculatorGraphConfig() + text_format.Parse(text_config, config_proto) + graph = mp.CalculatorGraph(graph_config=config_proto) + + hello_world_packet = mp.packet_creator.create_string('hello world') + out = [] + graph = mp.CalculatorGraph(graph_config=config_proto) + graph.observe_output_stream('out', lambda _, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet, timestamp=0) + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet.at(1)) + graph.close() + self.assertEqual(graph.graph_input_stream_add_mode, + mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL) + self.assertEqual(graph.max_queue_size, 1) + self.assertFalse(graph.has_error()) + self.assertLen(out, 2) + self.assertEqual(out[0].timestamp, 0) + self.assertEqual(out[1].timestamp, 1) + self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world') + self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world') + + def testGraphInitializedWithTextConfig(self): + text_config = """ + max_queue_size: 1 + input_stream: 'in' + output_stream: 'out' + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + output_stream: 'out' + } + """ + + hello_world_packet = mp.packet_creator.create_string('hello world') + out = [] + graph = mp.CalculatorGraph(graph_config=text_config) + graph.observe_output_stream('out', lambda _, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet.at(0)) + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet, timestamp=1) + graph.close() + self.assertEqual(graph.graph_input_stream_add_mode, + mp.GraphInputStreamAddMode.WAIT_TILL_NOT_FULL) + self.assertEqual(graph.max_queue_size, 1) + self.assertFalse(graph.has_error()) + self.assertLen(out, 2) + self.assertEqual(out[0].timestamp, 0) + self.assertEqual(out[1].timestamp, 1) + self.assertEqual(mp.packet_getter.get_str(out[0]), 'hello world') + self.assertEqual(mp.packet_getter.get_str(out[1]), 'hello world') + + def testInsertPacketsWithSameTimestamp(self): + text_config = """ + max_queue_size: 1 + input_stream: 'in' + output_stream: 'out' + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + output_stream: 'out' + } + """ + config_proto = calculator_pb2.CalculatorGraphConfig() + text_format.Parse(text_config, config_proto) + + hello_world_packet = mp.packet_creator.create_string('hello world') + out = [] + graph = mp.CalculatorGraph(graph_config=config_proto) + graph.observe_output_stream('out', lambda _, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet.at(0)) + graph.wait_until_idle() + graph.add_packet_to_input_stream( + stream='in', packet=hello_world_packet.at(0)) + with self.assertRaisesRegex( + ValueError, 'Current minimum expected timestamp is 1 but received 0.'): + graph.wait_until_idle() + + def testSidePacketGraph(self): + text_config = """ + node { + calculator: 'StringToUint64Calculator' + input_side_packet: "string" + output_side_packet: "number" + } + """ + config_proto = calculator_pb2.CalculatorGraphConfig() + text_format.Parse(text_config, config_proto) + graph = mp.CalculatorGraph(graph_config=config_proto) + graph.start_run( + input_side_packets={'string': mp.packet_creator.create_string('42')}) + graph.wait_until_done() + self.assertFalse(graph.has_error()) + self.assertEqual( + mp.packet_getter.get_uint(graph.get_output_side_packet('number')), 42) + + +if __name__ == '__main__': + absltest.main() diff --git a/mediapipe/python/framework_bindings.cc b/mediapipe/python/framework_bindings.cc new file mode 100644 index 000000000..9bb3571f0 --- /dev/null +++ b/mediapipe/python/framework_bindings.cc @@ -0,0 +1,39 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/calculator_graph.h" +#include "mediapipe/python/pybind/image_frame.h" +#include "mediapipe/python/pybind/matrix.h" +#include "mediapipe/python/pybind/packet.h" +#include "mediapipe/python/pybind/packet_creator.h" +#include "mediapipe/python/pybind/packet_getter.h" +#include "mediapipe/python/pybind/resource_util.h" +#include "mediapipe/python/pybind/timestamp.h" + +namespace mediapipe { +namespace python { + +PYBIND11_MODULE(_framework_bindings, m) { + ResourceUtilSubmodule(&m); + ImageFrameSubmodule(&m); + MatrixSubmodule(&m); + TimestampSubmodule(&m); + PacketSubmodule(&m); + PacketCreatorSubmodule(&m); + PacketGetterSubmodule(&m); + CalculatorGraphSubmodule(&m); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/image_frame_test.py b/mediapipe/python/image_frame_test.py new file mode 100644 index 000000000..8e70fd73f --- /dev/null +++ b/mediapipe/python/image_frame_test.py @@ -0,0 +1,145 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for mediapipe.python._framework_bindings.image_frame.""" + +import random +from absl.testing import absltest +import cv2 +import numpy as np +import PIL.Image +import mediapipe.python as mp + + +# TODO: Add unit tests specifically for memory management. +class ImageFrameTest(absltest.TestCase): + + def testCreateImageFrameFromGrayCvMat(self): + w, h = random.randrange(3, 100), random.randrange(3, 100) + mat = cv2.cvtColor( + np.random.randint(2**8 - 1, size=(h, w, 3), dtype=np.uint8), + cv2.COLOR_RGB2GRAY) + mat[2, 2] = 42 + image_frame = mp.ImageFrame(image_format=mp.ImageFormat.GRAY8, data=mat) + self.assertTrue(np.array_equal(mat, image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'index dimension mismatch'): + print(image_frame[w, h, 1]) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[w, h]) + self.assertEqual(42, image_frame[2, 2]) + + def testCreateImageFrameFromRgbCvMat(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + mat = cv2.cvtColor( + np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8), + cv2.COLOR_RGB2BGR) + mat[2, 2, 1] = 42 + image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat) + self.assertTrue(np.array_equal(mat, image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[w, h, channels]) + self.assertEqual(42, image_frame[2, 2, 1]) + + def testCreateImageFrameFromRgb48CvMat(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + mat = cv2.cvtColor( + np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16), + cv2.COLOR_RGB2BGR) + mat[2, 2, 1] = 42 + image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB48, data=mat) + self.assertTrue(np.array_equal(mat, image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[w, h, channels]) + self.assertEqual(42, image_frame[2, 2, 1]) + + def testCreateImageFrameFromGrayPilImage(self): + w, h = random.randrange(3, 100), random.randrange(3, 100) + img = PIL.Image.fromarray( + np.random.randint(2**8 - 1, size=(h, w), dtype=np.uint8), 'L') + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.GRAY8, data=np.asarray(img)) + self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'index dimension mismatch'): + print(image_frame[w, h, 1]) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[w, h]) + + def testCreateImageFrameFromRgbPilImage(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + img = PIL.Image.fromarray( + np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8), + 'RGB') + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.SRGB, data=np.asarray(img)) + self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[w, h, channels]) + + def testCreateImageFrameFromRgba64PilImage(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 4 + img = PIL.Image.fromarray( + np.random.randint(2**16 - 1, size=(h, w, channels), dtype=np.uint16), + 'RGBA') + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.SRGBA64, + data=np.asarray(img, dtype=np.uint16)) + self.assertTrue(np.array_equal(np.asarray(img), image_frame.numpy_view())) + with self.assertRaisesRegex(IndexError, 'out of bounds'): + print(image_frame[1000, 1000, 1000]) + + def testImageFrameNumbyView(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + mat = cv2.cvtColor( + np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8), + cv2.COLOR_RGB2BGR) + image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=mat) + output_ndarray = image_frame.numpy_view() + self.assertTrue(np.array_equal(mat, image_frame.numpy_view())) + # The output of numpy_view() is a reference to the internal data and it's + # unwritable after creation. + with self.assertRaisesRegex(ValueError, + 'assignment destination is read-only'): + output_ndarray[0, 0, 0] = 0 + copied_ndarray = np.copy(output_ndarray) + copied_ndarray[0, 0, 0] = 0 + + def testCroppedGray8Image(self): + w, h = random.randrange(20, 100), random.randrange(20, 100) + channels, offset = 3, 10 + mat = cv2.cvtColor( + np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8), + cv2.COLOR_RGB2GRAY) + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.GRAY8, + data=mat[offset:-offset, offset:-offset]) + self.assertTrue( + np.array_equal(mat[offset:-offset, offset:-offset], + image_frame.numpy_view())) + + def testCroppedRGBImage(self): + w, h = random.randrange(20, 100), random.randrange(20, 100) + channels, offset = 3, 10 + mat = cv2.cvtColor( + np.random.randint(2**8 - 1, size=(h, w, channels), dtype=np.uint8), + cv2.COLOR_RGB2BGR) + image_frame = mp.ImageFrame( + image_format=mp.ImageFormat.SRGB, + data=mat[offset:-offset, offset:-offset, :]) + self.assertTrue( + np.array_equal(mat[offset:-offset, offset:-offset, :], + image_frame.numpy_view())) + + +if __name__ == '__main__': + absltest.main() diff --git a/mediapipe/python/packet_creator.py b/mediapipe/python/packet_creator.py new file mode 100644 index 000000000..c904dd780 --- /dev/null +++ b/mediapipe/python/packet_creator.py @@ -0,0 +1,126 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""The public facing packet creator APIs.""" + +from typing import List, Union + +import numpy as np + +from google.protobuf import message +from mediapipe.python._framework_bindings import _packet_creator +from mediapipe.python._framework_bindings import image_frame +from mediapipe.python._framework_bindings import packet + + +create_string = _packet_creator.create_string +create_bool = _packet_creator.create_bool +create_int = _packet_creator.create_int +create_int8 = _packet_creator.create_int8 +create_int16 = _packet_creator.create_int16 +create_int32 = _packet_creator.create_int32 +create_int64 = _packet_creator.create_int64 +create_uint8 = _packet_creator.create_uint8 +create_uint16 = _packet_creator.create_uint16 +create_uint32 = _packet_creator.create_uint32 +create_uint64 = _packet_creator.create_uint64 +create_float = _packet_creator.create_float +create_double = _packet_creator.create_double +create_int_array = _packet_creator.create_int_array +create_float_array = _packet_creator.create_float_array +create_int_vector = _packet_creator.create_int_vector +create_float_vector = _packet_creator.create_float_vector +create_string_vector = _packet_creator.create_string_vector +create_packet_vector = _packet_creator.create_packet_vector +create_string_to_packet_map = _packet_creator.create_string_to_packet_map +create_matrix = _packet_creator.create_matrix + + +def create_image_frame( + data: Union[image_frame.ImageFrame, np.ndarray], + *, + image_format: image_frame.ImageFormat = None) -> packet.Packet: + """Create a MediaPipe ImageFrame packet. + + A MediaPipe ImageFrame packet can be created from either the raw pixel data + represented as a numpy array with one of the uint8, uint16, and float data + types or an existing MediaPipe ImageFrame object. The data will be realigned + and copied into an ImageFrame object inside of the packet. + + Args: + data: A MediaPipe ImageFrame object or the raw pixel data that is + represnted as a numpy ndarray. + image_format: One of the image_frame.ImageFormat enum types. + + Returns: + A MediaPipe ImageFrame Packet. + + Raises: + ValueError: + i) When "data" is a numpy ndarray, "image_format" is not provided. + ii) When "data" is an ImageFrame object, the "image_format" arg doesn't + match the image format of the "data" ImageFrame object. + TypeError: If "image format" doesn't match "data" array's data type. + + Examples: + np_array = np.random.randint(255, size=(321, 123, 3), dtype=np.uint8) + image_frame_packet = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=np_array) + + image_frame = mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=np_array) + image_frame_packet = mp.packet_creator.create_image_frame(image_frame) + + """ + if isinstance(data, image_frame.ImageFrame): + if image_format is not None and data.image_format != image_format: + raise ValueError( + 'The provided image_format doesn\'t match the one from the data arg.') + # pylint:disable=protected-access + return _packet_creator._create_image_frame_with_copy(data) + # pylint:enable=protected-access + else: + if image_format is None: + raise ValueError('Please provide \'image_format\' with \'data\'.') + # pylint:disable=protected-access + return _packet_creator._create_image_frame_with_copy(image_format, data) + # pylint:enable=protected-access + + +def create_proto(proto_message: message.Message) -> packet.Packet: + """Create a MediaPipe protobuf message packet. + + Args: + proto_message: A Python protobuf message. + + Returns: + A MediaPipe protobuf message Packet. + + Raises: + RuntimeError: If the protobuf message type is not registered in MediaPipe. + + Examples: + detection = detection_pb2.Detection() + text_format.Parse('score: 0.5', detection) + packet = mp.packet_creator.create_proto(detection) + output_detection = mp.packet_getter.get_proto(packet) + """ + # pylint:disable=protected-access + return _packet_creator._create_proto(proto_message.DESCRIPTOR.full_name, + proto_message.SerializeToString()) + # pylint:enable=protected-access + + +def create_proto_vector(message_list: List[message.Message]) -> packet.Packet: + raise NotImplementedError('create_proto_vector is not implemented.') diff --git a/mediapipe/python/packet_getter.py b/mediapipe/python/packet_getter.py new file mode 100644 index 000000000..1822890e7 --- /dev/null +++ b/mediapipe/python/packet_getter.py @@ -0,0 +1,117 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""The public facing packet getter APIs.""" + +from typing import List, Type + +from google.protobuf import message +from google.protobuf import symbol_database +from mediapipe.python._framework_bindings import _packet_getter +from mediapipe.python._framework_bindings import packet as mp_packet + +get_str = _packet_getter.get_str +get_bytes = _packet_getter.get_bytes +get_bool = _packet_getter.get_bool +get_int = _packet_getter.get_int +get_uint = _packet_getter.get_uint +get_float = _packet_getter.get_float +get_int_list = _packet_getter.get_int_list +get_float_list = _packet_getter.get_float_list +get_str_list = _packet_getter.get_str_list +get_packet_list = _packet_getter.get_packet_list +get_str_to_packet_dict = _packet_getter.get_str_to_packet_dict +get_image_frame = _packet_getter.get_image_frame +get_matrix = _packet_getter.get_matrix + + +def get_proto(packet: mp_packet.Packet) -> Type[message.Message]: + """Get the content of a MediaPipe proto Packet as a proto message. + + Args: + packet: A MediaPipe proto Packet. + + Returns: + A proto message. + + Raises: + TypeError: If the message descriptor can't be found by type name. + + Examples: + detection = detection_pb2.Detection() + text_format.Parse('score: 0.5', detection) + proto_packet = mp.packet_creator.create_proto(detection) + output_proto = mp.packet_getter.get_proto(proto_packet) + """ + # pylint:disable=protected-access + proto_type_name = _packet_getter._get_proto_type_name(packet) + # pylint:enable=protected-access + try: + descriptor = symbol_database.Default().pool.FindMessageTypeByName( + proto_type_name) + except KeyError: + raise TypeError('Can not find message descriptor by type name: %s' % + proto_type_name) + + message_class = symbol_database.Default().GetPrototype(descriptor) + # pylint:disable=protected-access + serialized_proto = _packet_getter._get_serialized_proto(packet) + # pylint:enable=protected-access + proto_message = message_class() + proto_message.ParseFromString(serialized_proto) + return proto_message + + +def get_proto_list(packet: mp_packet.Packet) -> List[message.Message]: + """Get the content of a MediaPipe proto vector Packet as a proto message list. + + Args: + packet: A MediaPipe proto vector Packet. + + Returns: + A proto message list. + + Raises: + TypeError: If the message descriptor can't be found by type name. + + Examples: + proto_list = mp.packet_getter.get_proto_list(protos_packet) + """ + # pylint:disable=protected-access + vector_size = _packet_getter._get_proto_vector_size(packet) + # pylint:enable=protected-access + # Return empty list if the proto vector is empty. + if vector_size == 0: + return [] + + # pylint:disable=protected-access + proto_type_name = _packet_getter._get_proto_vector_element_type_name(packet) + # pylint:enable=protected-access + try: + descriptor = symbol_database.Default().pool.FindMessageTypeByName( + proto_type_name) + except KeyError: + raise TypeError('Can not find message descriptor by type name: %s' % + proto_type_name) + message_class = symbol_database.Default().GetPrototype(descriptor) + # pylint:disable=protected-access + serialized_protos = _packet_getter._get_serialized_proto_list(packet) + # pylint:enable=protected-access + proto_message_list = [] + for serialized_proto in serialized_protos: + proto_message = message_class() + proto_message.ParseFromString(serialized_proto) + proto_message_list.append(proto_message) + return proto_message_list diff --git a/mediapipe/python/packet_test.py b/mediapipe/python/packet_test.py new file mode 100644 index 000000000..a987f470d --- /dev/null +++ b/mediapipe/python/packet_test.py @@ -0,0 +1,349 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for mediapipe.python._framework_bindings.packet.""" + +import gc +import random +import sys +from absl.testing import absltest +import numpy as np +import mediapipe.python as mp +from google.protobuf import text_format +from mediapipe.framework.formats import detection_pb2 + + +class PacketTest(absltest.TestCase): + + def testEmptyPacket(self): + p = mp.Packet() + self.assertTrue(p.is_empty()) + + def testBooleanPacket(self): + p = mp.packet_creator.create_bool(True) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_bool(p), True) + self.assertEqual(p.timestamp, 0) + + def testIntPacket(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_int(2**32) + p = mp.packet_creator.create_int(42) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p), 42) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_int(np.intc(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testInt8Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_int8(2**7) + p = mp.packet_creator.create_int8(2**7 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p), 2**7 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_int8(np.int8(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testInt16Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_int16(2**15) + p = mp.packet_creator.create_int16(2**15 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p), 2**15 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_int16(np.int16(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testInt32Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_int32(2**31) + + p = mp.packet_creator.create_int32(2**31 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p), 2**31 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_int32(np.int32(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testInt64Packet(self): + p = mp.packet_creator.create_int64(2**63 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p), 2**63 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_int64(np.int64(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_int(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testUint8Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_uint8(2**8) + p = mp.packet_creator.create_uint8(2**8 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p), 2**8 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_uint8(np.uint8(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testUint16Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_uint16(2**16) + p = mp.packet_creator.create_uint16(2**16 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p), 2**16 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_uint16(np.uint16(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testUint32Packet(self): + with self.assertRaisesRegex(OverflowError, 'execeeds the maximum value'): + p = mp.packet_creator.create_uint32(2**32) + p = mp.packet_creator.create_uint32(2**32 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p), 2**32 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_uint32(np.uint32(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testUint64Packet(self): + p = mp.packet_creator.create_uint64(2**64 - 1) + p.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p), 2**64 - 1) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_uint64(np.uint64(1)) + p2.timestamp = 0 + self.assertEqual(mp.packet_getter.get_uint(p2), 1) + self.assertEqual(p2.timestamp, 0) + + def testFloatPacket(self): + p = mp.packet_creator.create_float(0.42) + p.timestamp = 0 + self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_float(np.float(0.42)) + p2.timestamp = 0 + self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42) + self.assertEqual(p2.timestamp, 0) + + def testDoublePacket(self): + p = mp.packet_creator.create_double(0.42) + p.timestamp = 0 + self.assertAlmostEqual(mp.packet_getter.get_float(p), 0.42) + self.assertEqual(p.timestamp, 0) + p2 = mp.packet_creator.create_double(np.double(0.42)) + p2.timestamp = 0 + self.assertAlmostEqual(mp.packet_getter.get_float(p2), 0.42) + self.assertEqual(p2.timestamp, 0) + + def testDetectionProtoPacket(self): + detection = detection_pb2.Detection() + text_format.Parse('score: 0.5', detection) + p = mp.packet_creator.create_proto(detection).at(100) + + def testStringPacket(self): + p = mp.packet_creator.create_string('abc').at(100) + self.assertEqual(mp.packet_getter.get_str(p), 'abc') + self.assertEqual(p.timestamp, 100) + p.timestamp = 200 + self.assertEqual(p.timestamp, 200) + + def testBytesPacket(self): + p = mp.packet_creator.create_string(b'xd0\xba\xd0').at(300) + self.assertEqual(mp.packet_getter.get_bytes(p), b'xd0\xba\xd0') + self.assertEqual(p.timestamp, 300) + + def testIntArrayPacket(self): + p = mp.packet_creator.create_int_array([1, 2, 3]).at(100) + self.assertEqual(p.timestamp, 100) + + def testFloatArrayPacket(self): + p = mp.packet_creator.create_float_array([0.1, 0.2, 0.3]).at(100) + self.assertEqual(p.timestamp, 100) + + def testIntVectorPacket(self): + p = mp.packet_creator.create_int_vector([1, 2, 3]).at(100) + self.assertEqual(mp.packet_getter.get_int_list(p), [1, 2, 3]) + self.assertEqual(p.timestamp, 100) + + def testFloatVectorPacket(self): + p = mp.packet_creator.create_float_vector([0.1, 0.2, 0.3]).at(100) + output_list = mp.packet_getter.get_float_list(p) + self.assertAlmostEqual(output_list[0], 0.1) + self.assertAlmostEqual(output_list[1], 0.2) + self.assertAlmostEqual(output_list[2], 0.3) + self.assertEqual(p.timestamp, 100) + + def testStringVectorPacket(self): + p = mp.packet_creator.create_string_vector(['a', 'b', 'c']).at(100) + output_list = mp.packet_getter.get_str_list(p) + self.assertEqual(output_list[0], 'a') + self.assertEqual(output_list[1], 'b') + self.assertEqual(output_list[2], 'c') + self.assertEqual(p.timestamp, 100) + + def testPacketVectorPacket(self): + p = mp.packet_creator.create_packet_vector([ + mp.packet_creator.create_float(0.42), + mp.packet_creator.create_int(42), + mp.packet_creator.create_string('42') + ]).at(100) + output_list = mp.packet_getter.get_packet_list(p) + self.assertAlmostEqual(mp.packet_getter.get_float(output_list[0]), 0.42) + self.assertEqual(mp.packet_getter.get_int(output_list[1]), 42) + self.assertEqual(mp.packet_getter.get_str(output_list[2]), '42') + self.assertEqual(p.timestamp, 100) + + def testStringToPacketMapPacket(self): + p = mp.packet_creator.create_string_to_packet_map({ + 'float': mp.packet_creator.create_float(0.42), + 'int': mp.packet_creator.create_int(42), + 'string': mp.packet_creator.create_string('42') + }).at(100) + output_list = mp.packet_getter.get_str_to_packet_dict(p) + self.assertAlmostEqual( + mp.packet_getter.get_float(output_list['float']), 0.42) + self.assertEqual(mp.packet_getter.get_int(output_list['int']), 42) + self.assertEqual(mp.packet_getter.get_str(output_list['string']), '42') + self.assertEqual(p.timestamp, 100) + + def testUint8ImageFramePacket(self): + uint8_img = np.random.randint( + 2**8 - 1, + size=(random.randrange(3, 100), random.randrange(3, 100), 3), + dtype=np.uint8) + p = mp.packet_creator.create_image_frame( + mp.ImageFrame(image_format=mp.ImageFormat.SRGB, data=uint8_img)) + output_image_frame = mp.packet_getter.get_image_frame(p) + self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint8_img)) + + def testUint16ImageFramePacket(self): + uint16_img = np.random.randint( + 2**16 - 1, + size=(random.randrange(3, 100), random.randrange(3, 100), 4), + dtype=np.uint16) + p = mp.packet_creator.create_image_frame( + mp.ImageFrame(image_format=mp.ImageFormat.SRGBA64, data=uint16_img)) + output_image_frame = mp.packet_getter.get_image_frame(p) + self.assertTrue(np.array_equal(output_image_frame.numpy_view(), uint16_img)) + + def testFloatImageFramePacket(self): + float_img = np.float32( + np.random.random_sample( + (random.randrange(3, 100), random.randrange(3, 100), 2))) + p = mp.packet_creator.create_image_frame( + mp.ImageFrame(image_format=mp.ImageFormat.VEC32F2, data=float_img)) + output_image_frame = mp.packet_getter.get_image_frame(p) + self.assertTrue(np.allclose(output_image_frame.numpy_view(), float_img)) + + def testImageFramePacketCreationCopyMode(self): + w, h, channels = random.randrange(3, 100), random.randrange(3, 100), 3 + rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8) + # rgb_data is c_contiguous. + self.assertTrue(rgb_data.flags.c_contiguous) + initial_ref_count = sys.getrefcount(rgb_data) + p = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=rgb_data) + # copy mode doesn't increase the ref count of the data. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + + rgb_data = rgb_data[:, :, ::-1] + # rgb_data is now not c_contiguous. But, copy mode shouldn't be affected. + self.assertFalse(rgb_data.flags.c_contiguous) + initial_ref_count = sys.getrefcount(rgb_data) + p = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, data=rgb_data) + # copy mode doesn't increase the ref count of the data. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + + output_frame = mp.packet_getter.get_image_frame(p) + self.assertEqual(output_frame.height, h) + self.assertEqual(output_frame.width, w) + self.assertEqual(output_frame.channels, channels) + self.assertTrue(np.array_equal(output_frame.numpy_view(), rgb_data)) + + del p + del output_frame + gc.collect() + # Destroying the packet also doesn't affect the ref count becuase of the + # copy mode. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + + def testImageFramePacketCopyConstuctionWithCropping(self): + w, h, channels = random.randrange(40, 100), random.randrange(40, 100), 3 + channels, offset = 3, 10 + rgb_data = np.random.randint(255, size=(h, w, channels), dtype=np.uint8) + initial_ref_count = sys.getrefcount(rgb_data) + p = mp.packet_creator.create_image_frame( + image_format=mp.ImageFormat.SRGB, + data=rgb_data[offset:-offset, offset:-offset, :]) + # copy mode doesn't increase the ref count of the data. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + output_frame = mp.packet_getter.get_image_frame(p) + self.assertEqual(output_frame.height, h - 2 * offset) + self.assertEqual(output_frame.width, w - 2 * offset) + self.assertEqual(output_frame.channels, channels) + self.assertTrue( + np.array_equal(rgb_data[offset:-offset, offset:-offset, :], + output_frame.numpy_view())) + del p + del output_frame + gc.collect() + # Destroying the packet also doesn't affect the ref count becuase of the + # copy mode. + self.assertEqual(sys.getrefcount(rgb_data), initial_ref_count) + + def testMatrixPacket(self): + np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]]) + initial_ref_count = sys.getrefcount(np_matrix) + p = mp.packet_creator.create_matrix(np_matrix) + # Copy mode should not increase the ref count of np_matrix. + self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix)) + output_matrix = mp.packet_getter.get_matrix(p) + del np_matrix + gc.collect() + self.assertTrue( + np.allclose(output_matrix, np.array([[.1, .2, .3], [.4, .5, .6]]))) + + def testMatrixPacketWithNonCContiguousData(self): + np_matrix = np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1] + # np_matrix is not c_contiguous. + self.assertFalse(np_matrix.flags.c_contiguous) + p = mp.packet_creator.create_matrix(np_matrix) + initial_ref_count = sys.getrefcount(np_matrix) + # Copy mode should not increase the ref count of np_matrix. + self.assertEqual(initial_ref_count, sys.getrefcount(np_matrix)) + output_matrix = mp.packet_getter.get_matrix(p) + del np_matrix + gc.collect() + self.assertTrue( + np.allclose(output_matrix, + np.array([[.1, .2, .3], [.4, .5, .6]])[:, ::-1])) + +if __name__ == '__main__': + absltest.main() diff --git a/mediapipe/python/pybind/BUILD b/mediapipe/python/pybind/BUILD new file mode 100644 index 000000000..72ee24c72 --- /dev/null +++ b/mediapipe/python/pybind/BUILD @@ -0,0 +1,141 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@pybind11_bazel//:build_defs.bzl", "pybind_library") + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/python:__subpackages__"]) + +pybind_library( + name = "calculator_graph", + srcs = ["calculator_graph.cc"], + hdrs = ["calculator_graph.h"], + deps = [ + ":util", + "//mediapipe/framework:calculator_cc_proto", + "//mediapipe/framework:calculator_graph", + "//mediapipe/framework:packet", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:map_util", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "//mediapipe/framework/tool:calculator_graph_template_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +pybind_library( + name = "image_frame", + srcs = ["image_frame.cc"], + hdrs = ["image_frame.h"], + deps = [ + ":image_frame_util", + ":util", + ], +) + +pybind_library( + name = "image_frame_util", + hdrs = ["image_frame_util.h"], + deps = [ + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/port:logging", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +pybind_library( + name = "matrix", + srcs = ["matrix.cc"], + hdrs = ["matrix.h"], + deps = [ + "//mediapipe/framework/formats:matrix", + ], +) + +pybind_library( + name = "packet", + srcs = ["packet.cc"], + hdrs = ["packet.h"], + deps = [ + ":util", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + ], +) + +pybind_library( + name = "packet_creator", + srcs = ["packet_creator.cc"], + hdrs = ["packet_creator.h"], + deps = [ + ":image_frame_util", + ":util", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/port:integral_types", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +pybind_library( + name = "packet_getter", + srcs = ["packet_getter.cc"], + hdrs = ["packet_getter.h"], + deps = [ + ":image_frame_util", + ":util", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/port:integral_types", + ], +) + +pybind_library( + name = "timestamp", + srcs = ["timestamp.cc"], + hdrs = ["timestamp.h"], + deps = [ + ":util", + "//mediapipe/framework:timestamp", + "@com_google_absl//absl/strings", + ], +) + +pybind_library( + name = "resource_util", + srcs = ["resource_util.cc"], + hdrs = ["resource_util.h"], + deps = [ + "//mediapipe/util:resource_util", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + ], +) + +pybind_library( + name = "util", + hdrs = ["util.h"], + deps = [ + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:status", + ], +) diff --git a/mediapipe/python/pybind/calculator_graph.cc b/mediapipe/python/pybind/calculator_graph.cc new file mode 100644 index 000000000..6e0ba552d --- /dev/null +++ b/mediapipe/python/pybind/calculator_graph.cc @@ -0,0 +1,466 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/calculator_graph.h" + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/map_util.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/tool/calculator_graph_template.pb.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/embed.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace mediapipe { +namespace python { + +template +T ParseProto(const py::object& proto_object) { + T proto; + if (!ParseTextProto(proto_object.str(), &proto)) { + throw RaisePyError( + PyExc_RuntimeError, + absl::StrCat("Failed to parse: ", std::string(proto_object.str())) + .c_str()); + } + return proto; +} + +namespace py = pybind11; + +void CalculatorGraphSubmodule(pybind11::module* module) { + py::module m = module->def_submodule("calculator_graph", + "MediaPipe calculator graph module."); + + using GraphInputStreamAddMode = + mediapipe::CalculatorGraph::GraphInputStreamAddMode; + + py::enum_(m, "GraphInputStreamAddMode") + .value("WAIT_TILL_NOT_FULL", GraphInputStreamAddMode::WAIT_TILL_NOT_FULL) + .value("ADD_IF_NOT_FULL", GraphInputStreamAddMode::ADD_IF_NOT_FULL) + .export_values(); + + // Calculator Graph + py::class_ calculator_graph( + m, "CalculatorGraph", R"doc(The primary API for the MediaPipe Framework. + + MediaPipe processing takes place inside a graph, which defines packet flow + paths between nodes. A graph can have any number of inputs and outputs, and + data flow can branch and merge. Generally data flows forward, but backward + loops are possible.)doc"); + + // TODO: Support graph initialization with graph templates and + // subgraph. + calculator_graph.def( + py::init([](py::args args, py::kwargs kwargs) { + if (!args.empty()) { + throw RaisePyError(PyExc_RuntimeError, + "Invalid position input arguments."); + } + bool init_with_binary_graph = false; + bool init_with_graph_proto = false; + CalculatorGraphConfig graph_config_proto; + for (const auto& kw : kwargs) { + const std::string& key = kw.first.cast(); + if (key == "binary_graph_path") { + init_with_binary_graph = true; + std::string file_name(kw.second.cast().str()); + auto status = file::Exists(file_name); + if (!status.ok()) { + throw RaisePyError(PyExc_FileNotFoundError, + status.message().data()); + } + std::string graph_config_string; + RaisePyErrorIfNotOk( + file::GetContents(file_name, &graph_config_string)); + if (!graph_config_proto.ParseFromArray( + graph_config_string.c_str(), + graph_config_string.length())) { + throw RaisePyError( + PyExc_RuntimeError, + absl::StrCat("Failed to parse the binary graph: ", file_name) + .c_str()); + } + } else if (key == "graph_config") { + init_with_graph_proto = true; + graph_config_proto = + ParseProto(kw.second.cast()); + } else { + throw RaisePyError( + PyExc_RuntimeError, + absl::StrCat("Unknown kwargs input argument: ", key).c_str()); + } + } + + if (!(init_with_binary_graph ^ init_with_graph_proto)) { + throw RaisePyError( + PyExc_ValueError, + "Please provide \'binary_graph\' to initialize the graph with" + " binary graph or provide \'graph_config\' to initialize the " + " with graph config proto."); + } + auto calculator_graph = absl::make_unique(); + RaisePyErrorIfNotOk(calculator_graph->Initialize(graph_config_proto)); + return calculator_graph.release(); + }), + R"doc(Initialize CalculatorGraph object. + + Args: + binary_graph_path: The path to a binary mediapipe graph file (.binarypb). + graph_config: A single CalculatorGraphConfig proto message or its text proto + format. + + Raises: + FileNotFoundError: If the binary graph file can't be found. + ValueError: If the input arguments prvoided are more than needed or the + graph validation process contains error. +)doc"); + + // TODO: Return a Python CalculatorGraphConfig instead. + calculator_graph.def_property_readonly( + "config", + [](const CalculatorGraph& self) { return self.Config().DebugString(); }); + + calculator_graph.def_property_readonly( + "serialized_config", [](const CalculatorGraph& self) { + return py::bytes(self.Config().SerializeAsString()); + }); + + calculator_graph.def_property_readonly( + "max_queue_size", + [](CalculatorGraph* self) { return self->GetMaxInputStreamQueueSize(); }); + + calculator_graph.def_property( + "graph_input_stream_add_mode", + [](const CalculatorGraph& self) { + return self.GetGraphInputStreamAddMode(); + }, + [](CalculatorGraph* self, CalculatorGraph::GraphInputStreamAddMode mode) { + self->SetGraphInputStreamAddMode(mode); + }); + + calculator_graph.def( + "add_packet_to_input_stream", + [](CalculatorGraph* self, const std::string& stream, const Packet& packet, + const Timestamp& timestamp) { + Timestamp packet_timestamp = + timestamp == Timestamp::Unset() ? packet.Timestamp() : timestamp; + if (!packet_timestamp.IsAllowedInStream()) { + throw RaisePyError( + PyExc_ValueError, + absl::StrCat(packet_timestamp.DebugString(), + " can't be the timestamp of a Packet in a stream.") + .c_str()); + } + RaisePyErrorIfNotOk( + self->AddPacketToInputStream(stream, packet.At(packet_timestamp))); + }, + R"doc(Add a packet to a graph input stream. + + If the graph input stream add mode is ADD_IF_NOT_FULL, the packet will not be + added if any queue exceeds the max queue size specified by the graph config + and will raise a Python runtime error. The WAIT_TILL_NOT_FULL mode (default) + will block until the queues fall below the max queue size before adding the + packet. If the mode is max queue size is -1, then the packet is added + regardless of the sizes of the queues in the graph. The input stream must have + been specified in the configuration as a graph level input stream. On error, + nothing is added. + + Args: + stream: The name of the graph input stream. + packet: The packet to be added into the input stream. + timestamp: The timestamp of the packet. If set, the original packet + timestamp will be overwritten. + + Raises: + RuntimeError: If the stream is not a graph input stream or the packet can't + be added into the input stream due to the limited queue size or the wrong + packet type. + ValueError: If the timestamp of the Packet is invalid to be the timestamp of + a Packet in a stream. + + Examples: + graph.add_packet_to_input_stream( + stream='in', + packet=packet_creator.create_string('hello world').at(0)) + + graph.add_packet_to_input_stream( + stream='in', + packet=packet_creator.create_string('hello world'), + timstamp=1) +)doc", + py::arg("stream"), py::arg("packet"), + py::arg("timestamp") = Timestamp::Unset()); + + calculator_graph.def( + "close_input_stream", + [](CalculatorGraph* self, const std::string& stream) { + RaisePyErrorIfNotOk(self->CloseInputStream(stream)); + }, + R"doc(Close the named graph input stream. + + Args: + stream: The name of the stream to be closed. + + Raises: + RuntimeError: If the stream is not a graph input stream. + +)doc"); + + calculator_graph.def( + "close_all_packet_sources", + [](CalculatorGraph* self) { + RaisePyErrorIfNotOk(self->CloseAllPacketSources()); + }, + R"doc(Closes all the graph input streams and source calculator nodes.)doc"); + + calculator_graph.def( + "start_run", + [](CalculatorGraph* self, const pybind11::dict& input_side_packets) { + std::map input_side_packet_map; + for (const auto& kv_pair : input_side_packets) { + InsertIfNotPresent(&input_side_packet_map, + kv_pair.first.cast(), + kv_pair.second.cast()); + } + RaisePyErrorIfNotOk(self->StartRun(input_side_packet_map)); + }, + + R"doc(Start a run of the calculator graph. + + A non-blocking call to start a run of the graph and will return when the graph + is started. If input_side_packets is provided, the method will runs the graph + after adding the given extra input side packets. + + start_run(), wait_until_done(), has_error(), add_packet_to_input_stream(), and + close() allow more control over the execution of the graph run. You can + insert packets directly into a stream while the graph is running. + Once start_run() has been called, the graph will continue to run until + wait_until_done() is called. + + If start_run() returns an error, then the graph is not started and a + subsequent call to start_run() can be attempted. + + Args: + input_side_packets: A dict maps from the input side packet names to the + packets. + + Raises: + RuntimeError: If the start run occurs any error, e.g. the graph config has + errors, the calculator can't be found, and the streams are not properly + connected. + + Examples: + graph = mp.CalculatorGraph(graph_config=video_process_graph) + graph.start_run( + input_side_packets={ + 'input_path': packet_creator.create_string('/tmp/input.video'), + 'output_path': packet_creator.create_string('/tmp/output.video') + }) + graph.close() + + out = [] + graph = mp.CalculatorGraph(graph_config=pass_through_graph) + graph.observe_output_stream('out', + lambda stream_name, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(0), timestamp=0) + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(1), timestamp=1) + graph.close() + +)doc", + py::arg("input_side_packets") = (py::dict){}); + + calculator_graph.def( + "wait_until_done", + [](CalculatorGraph* self) { RaisePyErrorIfNotOk(self->WaitUntilDone()); }, + R"doc(Wait for the current run to finish. + + A blocking call to wait for the current run to finish (block the current + thread until all source calculators are stopped, all graph input streams have + been closed, and no more calculators can be run). This function can be called + only after start_run(), + + Raises: + RuntimeError: If the graph occurs any error during the wait call. + + Examples: + out = [] + graph = mp.CalculatorGraph(graph_config=pass_through_graph) + graph.observe_output_stream('out', lambda stream_name, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(0), timestamp=0) + graph.close_all_packet_sources() + graph.wait_until_done() + +)doc"); + + calculator_graph.def( + "wait_until_idle", + [](CalculatorGraph* self) { RaisePyErrorIfNotOk(self->WaitUntilIdle()); }, + R"doc(Wait until the running graph is in the idle mode. + + Wait until the running graph is in the idle mode, which is when nothing can + be scheduled and nothing is running in the worker threads. This function can + be called only after start_run(). + + NOTE: The graph must not have any source nodes because source nodes prevent + the running graph from becoming idle until the source nodes are done. + + Raises: + RuntimeError: If the graph occurs any error during the wait call. + + Examples: + out = [] + graph = mp.CalculatorGraph(graph_config=pass_through_graph) + graph.observe_output_stream('out', + lambda stream_name, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(0), timestamp=0) + graph.wait_until_idle() + +)doc"); + + calculator_graph.def( + "wait_for_observed_output", + [](CalculatorGraph* self) { + RaisePyErrorIfNotOk(self->WaitForObservedOutput()); + }, + R"doc(Wait until a packet is emitted on one of the observed output streams. + + Returns immediately if a packet has already been emitted since the last + call to this function. + + Raises: + RuntimeError: + If the graph occurs any error or the graph is terminated while waiting. + + Examples: + out = [] + graph = mp.CalculatorGraph(graph_config=pass_through_graph) + graph.observe_output_stream('out', + lambda stream_name, packet: out.append(packet)) + graph.start_run() + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(0), timestamp=0) + graph.wait_for_observed_output() + value = packet_getter.get_int(out[0]) + graph.add_packet_to_input_stream( + stream='in', packet=packet_creator.create_int(1), timestamp=1) + graph.wait_for_observed_output() + value = packet_getter.get_int(out[1]) + +)doc"); + + calculator_graph.def( + "has_error", [](const CalculatorGraph& self) { return self.HasError(); }, + R"doc(Quick non-locking means of checking if the graph has encountered an error)doc"); + + calculator_graph.def( + "get_combined_error_message", + [](CalculatorGraph* self) { + ::mediapipe::Status error_status; + if (self->GetCombinedErrors(&error_status) && !error_status.ok()) { + return error_status.ToString(); + } + return std::string(); + }, + R"doc(Combines error messages as a single std::string. + + Examples: + if graph.has_error(): + print(graph.get_combined_error_message()) + +)doc"); + + // TODO: Support passing a single-argument lambda for convenience. + calculator_graph.def( + "observe_output_stream", + [](CalculatorGraph* self, const std::string& stream_name, + pybind11::function callback_fn) { + RaisePyErrorIfNotOk(self->ObserveOutputStream( + stream_name, [callback_fn, stream_name](const Packet& packet) { + callback_fn(stream_name, packet); + return mediapipe::OkStatus(); + })); + }, + R"doc(Observe the named output stream. + + callback_fn will be invoked on every packet emitted by the output stream. + This method can only be called before start_run(). + + Args: + stream_name: The name of the output stream. + callback_fn: The callback function to invoke on every packet emitted by the + output stream. + + Raises: + RuntimeError: If the calculator graph isn't initialized or the stream + doesn't exist. + + Examples: + out = [] + graph = mp.CalculatorGraph(graph_config=graph_config) + graph.observe_output_stream('out', + lambda stream_name, packet: out.append(packet)) + +)doc"); + + calculator_graph.def( + "close", + [](CalculatorGraph* self) { + RaisePyErrorIfNotOk(self->CloseAllPacketSources()); + RaisePyErrorIfNotOk(self->WaitUntilDone()); + }, + R"doc(Close all the input sources and shutdown the graph.)doc"); + + calculator_graph.def( + "get_output_side_packet", + [](CalculatorGraph* self, const std::string& packet_name) { + auto status_or_packet = self->GetOutputSidePacket(packet_name); + RaisePyErrorIfNotOk(status_or_packet.status()); + return status_or_packet.ValueOrDie(); + }, + R"doc(Get output side packet by name after the graph is done. + + Args: + stream: The name of the outnput stream. + + Raises: + RuntimeError: If the graph is still running or the output side packet is not + found or empty. + + Examples: + graph = mp.CalculatorGraph(graph_config=graph_config) + graph.start_run() + graph.close() + output_side_packet = graph.get_output_side_packet('packet_name') + +)doc", + py::return_value_policy::move); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/calculator_graph.h b/mediapipe/python/pybind/calculator_graph.h new file mode 100644 index 000000000..21108acab --- /dev/null +++ b/mediapipe/python/pybind/calculator_graph.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_CALCULATOR_GRAPH_H_ +#define MEDIAPIPE_PYTHON_PYBIND_CALCULATOR_GRAPH_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void CalculatorGraphSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_CALCULATOR_GRAPH_H_ diff --git a/mediapipe/python/pybind/image_frame.cc b/mediapipe/python/pybind/image_frame.cc new file mode 100644 index 000000000..cd48ade5c --- /dev/null +++ b/mediapipe/python/pybind/image_frame.cc @@ -0,0 +1,326 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/image_frame_util.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/stl.h" + +namespace mediapipe { +namespace python { +namespace { + +template +py::array GenerateContiguousDataArray(const ImageFrame& image_frame, + const py::object& py_object) { + std::vector shape{image_frame.Height(), image_frame.Width()}; + if (image_frame.NumberOfChannels() > 1) { + shape.push_back(image_frame.NumberOfChannels()); + } + py::array_t contiguous_data; + if (image_frame.IsContiguous()) { + // TODO: Create contiguous_data without copying ata. + // It's possible to achieve this with the help of py::capsule. + // Reference: https://github.com/pybind/pybind11/issues/1042, + contiguous_data = py::array_t( + shape, reinterpret_cast(image_frame.PixelData())); + } else { + auto contiguous_data_copy = + absl::make_unique(image_frame.Width() * image_frame.Height() * + image_frame.NumberOfChannels()); + image_frame.CopyToBuffer(contiguous_data_copy.get(), + image_frame.PixelDataSizeStoredContiguously()); + auto capsule = py::capsule(contiguous_data_copy.get(), [](void* data) { + if (data) { + delete[] reinterpret_cast(data); + } + }); + contiguous_data = py::array_t( + shape, contiguous_data_copy.release(), capsule); + } + + // In both cases, the underlying data is not writable in Python. + py::detail::array_proxy(contiguous_data.ptr())->flags &= + ~py::detail::npy_api::NPY_ARRAY_WRITEABLE_; + return contiguous_data; +} + +py::array GetContiguousDataAttr(const ImageFrame& image_frame, + const py::object& py_object) { + py::object get_data_attr = + py::getattr(py_object, "__contiguous_data", py::none()); + if (image_frame.IsEmpty()) { + throw RaisePyError(PyExc_RuntimeError, "ImageFrame is unallocated."); + } + // If __contiguous_data attr already stores data, return the cached results. + if (!get_data_attr.is_none()) { + return get_data_attr.cast(); + } + switch (image_frame.ChannelSize()) { + case sizeof(uint8): + py_object.attr("__contiguous_data") = + GenerateContiguousDataArray(image_frame, py_object); + break; + case sizeof(uint16): + py_object.attr("__contiguous_data") = + GenerateContiguousDataArray(image_frame, py_object); + break; + case sizeof(float): + py_object.attr("__contiguous_data") = + GenerateContiguousDataArray(image_frame, py_object); + break; + default: + throw RaisePyError(PyExc_RuntimeError, + "Unsupported image frame channel size. Data is not " + "uint8, uint16, or float?"); + } + return py_object.attr("__contiguous_data").cast(); +} + +template +py::object GetValue(const ImageFrame& image_frame, const std::vector& pos, + const py::object& py_object) { + py::array_t output_array = + GetContiguousDataAttr(image_frame, py_object); + if (pos.size() == 2) { + return py::cast(static_cast(output_array.at(pos[0], pos[1]))); + } else if (pos.size() == 3) { + return py::cast(static_cast(output_array.at(pos[0], pos[1], pos[2]))); + } + return py::none(); +} + +} // namespace + +namespace py = pybind11; + +void ImageFrameSubmodule(pybind11::module* module) { + py::module m = + module->def_submodule("image_frame", "MediaPipe image frame module"); + + py::options options; + options.disable_function_signatures(); + + // ImageFormat + py::enum_ image_format( + m, "ImageFormat", + R"doc(An enum describing supported raw image formats. + + SRGB: sRGB, interleaved: one byte for R, then one byte for G, then one byte + for B for each pixel. + + SRGBA: sRGBA, interleaved: one byte for R, one byte for G, one byte for B, one + byte for alpha or unused. + + SBGRA: sBGRA, interleaved: one byte for B, one byte for G, one byte for R, one + byte for alpha or unused. + + GRAY8: Grayscale, one byte per pixel. + + GRAY16: Grayscale, one uint16 per pixel. + + SRGB48: sRGB, interleaved, each component is a uint16. + + SRGBA64: sRGBA, interleaved, each component is a uint16. + + VEC32F1: One float per pixel. + + VEC32F2: Two floats per pixel. +)doc"); + + image_format.value("SRGB", mediapipe::ImageFormat::SRGB) + .value("SRGBA", mediapipe::ImageFormat::SRGBA) + .value("SBGRA", mediapipe::ImageFormat::SBGRA) + .value("GRAY8", mediapipe::ImageFormat::GRAY8) + .value("GRAY16", mediapipe::ImageFormat::GRAY16) + .value("SRGB48", mediapipe::ImageFormat::SRGB48) + .value("SRGBA64", mediapipe::ImageFormat::SRGBA64) + .value("VEC32F1", mediapipe::ImageFormat::VEC32F1) + .value("VEC32F2", mediapipe::ImageFormat::VEC32F2) + .export_values(); + + // ImageFrame + py::class_ image_frame( + m, "ImageFrame", + R"doc(A container for storing an image or a video frame, in one of several formats. + + Formats supported by ImageFrame are listed in the ImageFormat enum. + Pixels are encoded row-major in an interleaved fashion. ImageFrame supports + uint8, uint16, and float as its data types. + + ImageFrame can be created by copying the data from a numpy ndarray that stores + the pixel data continuously. An ImageFrame may realign the input data on its + default alignment boundary during creation. The data in an ImageFrame will + become immutable after creation. + + Creation examples: + import cv2 + cv_mat = cv2.imread(input_file)[:, :, ::-1] + rgb_frame = mp.ImageFrame(format=ImageFormat.SRGB, data=cv_mat) + gray_frame = mp.ImageFrame( + format=ImageFormat.GRAY, data=cv2.cvtColor(cv_mat, cv2.COLOR_RGB2GRAY)) + + from PIL import Image + pil_img = Image.new('RGB', (60, 30), color = 'red') + image_frame = mp.ImageFrame( + format=mp.ImageFormat.SRGB, data=np.asarray(pil_img)) + + The pixel data in an ImageFrame can be retrieved as a numpy ndarray by calling + `ImageFrame.numpy_view()`. The returned numpy ndarray is a reference to the + internal data and itself is unwritable. If the callers want to modify the + numpy ndarray, it's required to obtain a copy of it. + + Pixel data retrieval examples: + for channel in range(num_channel): + for col in range(width): + for row in range(height): + print(image_frame[row, col, channel]) + + output_ndarray = image_frame.numpy_view() + print(output_ndarray[0, 0, 0]) + copied_ndarray = np.copy(output_ndarray) + copied_ndarray[0,0,0] = 0 + )doc", + py::dynamic_attr()); + + image_frame + .def( + py::init([](mediapipe::ImageFormat::Format format, + const py::array_t& data) { + if (format != mediapipe::ImageFormat::GRAY8 && + format != mediapipe::ImageFormat::SRGB && + format != mediapipe::ImageFormat::SRGBA) { + throw RaisePyError(PyExc_RuntimeError, + "uint8 image data should be one of the GRAY8, " + "SRGB, and SRGBA MediaPipe image formats."); + } + return CreateImageFrame(format, data); + }), + R"doc(For uint8 data type, valid ImageFormat are GRAY8, SGRB, and SRGBA.)doc", + py::arg("image_format"), py::arg("data").noconvert()) + .def( + py::init([](mediapipe::ImageFormat::Format format, + const py::array_t& data) { + if (format != mediapipe::ImageFormat::GRAY16 && + format != mediapipe::ImageFormat::SRGB48 && + format != mediapipe::ImageFormat::SRGBA64) { + throw RaisePyError( + PyExc_RuntimeError, + "uint16 image data should be one of the GRAY16, " + "SRGB48, and SRGBA64 MediaPipe image formats."); + } + return CreateImageFrame(format, data); + }), + R"doc(For uint16 data type, valid ImageFormat are GRAY16, SRGB48, and SRGBA64.)doc", + py::arg("image_format"), py::arg("data").noconvert()) + .def( + py::init([](mediapipe::ImageFormat::Format format, + const py::array_t& data) { + if (format != mediapipe::ImageFormat::VEC32F1 && + format != mediapipe::ImageFormat::VEC32F2) { + throw RaisePyError( + PyExc_RuntimeError, + "float image data should be either VEC32F1 or VEC32F2 " + "MediaPipe image formats."); + } + return CreateImageFrame(format, data); + }), + R"doc(For float data type, valid ImageFormat are VEC32F1 and VEC32F2.)doc", + py::arg("image_format"), py::arg("data").noconvert()); + + image_frame.def( + "numpy_view", + [](ImageFrame& self) { + py::object py_object = + py::cast(self, py::return_value_policy::reference); + return GetContiguousDataAttr(self, py_object); + }, + R"doc(Return the image frame pixel data as an unwritable numpy ndarray. + + Realign the pixel data to be stored contiguously and return a reference to the + unwritable numpy ndarray. If the callers want to modify the numpy array data, + it's required to obtain a copy of the ndarray. + + Returns: + An unwritable numpy ndarray. + + Examples: + output_ndarray = image_frame.numpy_view() + copied_ndarray = np.copy(output_ndarray) + copied_ndarray[0,0,0] = 0 +)doc"); + + image_frame.def( + "__getitem__", + [](ImageFrame& self, const std::vector& pos) { + if (pos.size() != 3 && + !(pos.size() == 2 && self.NumberOfChannels() == 1)) { + throw RaisePyError( + PyExc_IndexError, + absl::StrCat("Invalid index dimension: ", pos.size()).c_str()); + } + py::object py_object = + py::cast(self, py::return_value_policy::reference); + switch (self.ByteDepth()) { + case 1: + return GetValue(self, pos, py_object); + case 2: + return GetValue(self, pos, py_object); + case 4: + return GetValue(self, pos, py_object); + default: + return py::object(); + } + }, + R"doc(Use the indexer operators to access pixel data. + + Raises: + IndexError: If the index is invalid or out of bounds. + + Examples: + for channel in range(num_channel): + for col in range(width): + for row in range(height): + print(image_frame[row, col, channel]) + +)doc"); + + image_frame + .def( + "is_contiguous", &ImageFrame::IsContiguous, + R"doc(Return True if the pixel data is stored contiguously (without any alignment padding areas).)doc") + .def("is_empty", &ImageFrame::IsEmpty, + R"doc(Return True if the pixel data is unallocated.)doc") + .def( + "is_aligned", &ImageFrame::IsAligned, + R"doc(Return True if each row of the data is aligned to alignment boundary, which must be 1 or a power of 2. + + Args: + alignment_boundary: An integer. + + Returns: + A boolean. + + Examples: + image_frame.is_aligned(16) +)doc"); + + image_frame.def_property_readonly("width", &ImageFrame::Width) + .def_property_readonly("height", &ImageFrame::Height) + .def_property_readonly("channels", &ImageFrame::NumberOfChannels) + .def_property_readonly("byte_depth", &ImageFrame::ByteDepth) + .def_property_readonly("image_format", &ImageFrame::Format); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/image_frame.h b/mediapipe/python/pybind/image_frame.h new file mode 100644 index 000000000..c9d8efa84 --- /dev/null +++ b/mediapipe/python/pybind/image_frame.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_H_ +#define MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void ImageFrameSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_H_ diff --git a/mediapipe/python/pybind/image_frame_util.h b/mediapipe/python/pybind/image_frame_util.h new file mode 100644 index 000000000..189b4e93b --- /dev/null +++ b/mediapipe/python/pybind/image_frame_util.h @@ -0,0 +1,61 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_UTIL_H_ +#define MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_UTIL_H_ + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/port/logging.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +// TODO: Implement the reference mode of image frame creation, which +// takes a reference to the external data rather than copying it over. +// A possible solution is to have a custom PixelDataDeleter: +// The refcount of the numpy array will be increased when the image frame is +// created by taking a reference to the external numpy array data. Then, the +// custom PixelDataDeleter will decrease the refcount when the image frame gets +// destroyed and let Python GC does its job. +template +std::unique_ptr CreateImageFrame( + mediapipe::ImageFormat::Format format, + const py::array_t& data) { + int rows = data.shape()[0]; + int cols = data.shape()[1]; + int width_step = ImageFrame::NumberOfChannelsForFormat(format) * + ImageFrame::ByteDepthForFormat(format) * cols; + auto image_frame = absl::make_unique( + format, /*width=*/cols, /*height=*/rows, width_step, + static_cast(data.request().ptr), + ImageFrame::PixelDataDeleter::kNone); + auto image_frame_copy = absl::make_unique(); + // Set alignment_boundary to kGlDefaultAlignmentBoundary so that both + // GPU and CPU can process it. + image_frame_copy->CopyFrom(*image_frame, + ImageFrame::kGlDefaultAlignmentBoundary); + return image_frame_copy; +} + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_IMAGE_FRAME_UTIL_H_ diff --git a/mediapipe/python/pybind/matrix.cc b/mediapipe/python/pybind/matrix.cc new file mode 100644 index 000000000..2a3958f01 --- /dev/null +++ b/mediapipe/python/pybind/matrix.cc @@ -0,0 +1,36 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/matrix.h" + +#include "mediapipe/framework/formats/matrix.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +void MatrixSubmodule(pybind11::module* module) { + py::module m = module->def_submodule("matrix", "MediaPipe matrix module."); + + py::class_(m, "Matrix") + .def(py::init( + // Pass by reference. + [](const Eigen::Ref& m) { return m; })); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/matrix.h b/mediapipe/python/pybind/matrix.h new file mode 100644 index 000000000..cbd19659b --- /dev/null +++ b/mediapipe/python/pybind/matrix.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_MATRIX_H_ +#define MEDIAPIPE_PYTHON_PYBIND_MATRIX_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void MatrixSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_MATRIX_H_ diff --git a/mediapipe/python/pybind/packet.cc b/mediapipe/python/pybind/packet.cc new file mode 100644 index 000000000..29054a4b0 --- /dev/null +++ b/mediapipe/python/pybind/packet.cc @@ -0,0 +1,72 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/packet.h" + +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +void PacketSubmodule(pybind11::module* module) { + py::module m = module->def_submodule("packet", "MediaPipe packet module."); + + py::class_ packet( + m, "Packet", + R"doc(The basic data flow unit of MediaPipe. A generic container class which can hold data of any type. + + A packet consists of a numeric timestamp and a shared pointer to an immutable + payload. The payload can be of any C++ type (See packet_creator module for + the list of the Python types that are supported). The payload's type is also + referred to as the type of the packet. Packets are value classes and can be + copied and moved cheaply. Each copy shares ownership of the payload, with be + copied reference-counting semantics. Each copy has its own timestamp. + + The preferred method of creating a Packet is to invoke the methods in the + "packet_creator" module. Packet contents can be retrieved by the methods in + the "packet_getter" module. +)doc"); + + packet.def(py::init(), + R"doc(Create an empty Packet, for which is_empty() is True and + timestamp() is Timestamp.unset. Calling packet getter methods on this Packet leads to runtime error.)doc"); + + packet.def( + "is_empty", &Packet::IsEmpty, + R"doc(Return true iff the Packet has been created using the default constructor Packet(), or is a copy of such a Packet.)doc"); + + packet.def(py::init()) + .def("at", [](Packet* self, + int64 ts_value) { return self->At(Timestamp(ts_value)); }) + .def("at", [](Packet* self, Timestamp ts) { return self->At(ts); }) + .def_property( + "timestamp", &Packet::Timestamp, + [](Packet* p, int64 ts_value) { *p = p->At(Timestamp(ts_value)); }) + .def("__repr__", [](const Packet& self) { + return absl::StrCat( + "" + : absl::StrCat(" and C++ type: ", self.DebugTypeName(), ">")); + }); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/packet.h b/mediapipe/python/pybind/packet.h new file mode 100644 index 000000000..d3a0cee08 --- /dev/null +++ b/mediapipe/python/pybind/packet.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_PACKET_H_ +#define MEDIAPIPE_PYTHON_PYBIND_PACKET_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void PacketSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_PACKET_H_ diff --git a/mediapipe/python/pybind/packet_creator.cc b/mediapipe/python/pybind/packet_creator.cc new file mode 100644 index 000000000..90959b3dd --- /dev/null +++ b/mediapipe/python/pybind/packet_creator.cc @@ -0,0 +1,645 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/packet_creator.h" + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/python/pybind/image_frame_util.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/eigen.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace mediapipe { +namespace python { +namespace { + +Packet CreateImageFramePacket(mediapipe::ImageFormat::Format format, + const py::array& data) { + if (format == mediapipe::ImageFormat::SRGB || + format == mediapipe::ImageFormat::SRGBA || + format == mediapipe::ImageFormat::GRAY8) { + return Adopt(CreateImageFrame(format, data).release()); + } else if (format == mediapipe::ImageFormat::GRAY16 || + format == mediapipe::ImageFormat::SRGB48 || + format == mediapipe::ImageFormat::SRGBA64) { + return Adopt(CreateImageFrame(format, data).release()); + } else if (format == mediapipe::ImageFormat::VEC32F1 || + format == mediapipe::ImageFormat::VEC32F2) { + return Adopt(CreateImageFrame(format, data).release()); + } + throw RaisePyError(PyExc_RuntimeError, + absl::StrCat("Unsupported ImageFormat: ", format).c_str()); + return Packet(); +} + +} // namespace + +namespace py = pybind11; + +void PublicPacketCreators(pybind11::module* m) { + m->def( + "create_string", + [](const std::string& data) { return MakePacket(data); }, + R"doc(Create a MediaPipe std::string Packet from a str. + + Args: + data: A str. + + Returns: + A MediaPipe std::string Packet. + + Raises: + TypeError: If the input is not a str. + + Examples: + packet = mp.packet_creator.create_string('abc') + data = mp.packet_getter.get_string(packet) +)doc", + py::return_value_policy::move); + + m->def( + "create_string", + [](const py::bytes& data) { return MakePacket(data); }, + R"doc(Create a MediaPipe std::string Packet from a bytes object. + + Args: + data: A bytes object. + + Returns: + A MediaPipe std::string Packet. + + Raises: + TypeError: If the input is not a bytes object. + + Examples: + packet = mp.packet_creator.create_string(b'\xd0\xd0\xd0') + data = mp.packet_getter.get_bytes(packet) +)doc", + py::return_value_policy::move); + + m->def( + "create_bool", [](bool data) { return MakePacket(data); }, + R"doc(Create a MediaPipe bool Packet from a boolean object. + + Args: + data: A boolean object. + + Returns: + A MediaPipe bool Packet. + + Raises: + TypeError: If the input is not a boolean object. + + Examples: + packet = mp.packet_creator.create_bool(True) + data = mp.packet_getter.get_bool(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int", + [](int64 data) { + RaisePyErrorIfOverflow(data, INT_MIN, INT_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe int Packet from an integer. + + Args: + data: An integer or a np.intc. + + Returns: + A MediaPipe int Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is not an integer. + + Examples: + packet = mp.packet_creator.create_int(0) + data = mp.packet_getter.get_int(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int8", + [](int64 data) { + RaisePyErrorIfOverflow(data, INT8_MIN, INT8_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe int8 Packet from an integer. + + Args: + data: An integer or a np.int8. + + Returns: + A MediaPipe int8 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.int8. + + Examples: + packet = mp.packet_creator.create_int8(2**7 - 1) + data = mp.packet_getter.get_int(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int16", + [](int64 data) { + RaisePyErrorIfOverflow(data, INT16_MIN, INT16_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe int16 Packet from an integer. + + Args: + data: An integer or a np.int16. + + Returns: + A MediaPipe int16 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.int16. + + Examples: + packet = mp.packet_creator.create_int16(2**15 - 1) + data = mp.packet_getter.get_int(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int32", + [](int64 data) { + RaisePyErrorIfOverflow(data, INT32_MIN, INT32_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe int32 Packet from an integer. + + Args: + data: An integer or a np.int32. + + Returns: + A MediaPipe int32 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.int32. + + Examples: + packet = mp.packet_creator.create_int32(2**31 - 1) + data = mp.packet_getter.get_int(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int64", [](int64 data) { return MakePacket(data); }, + R"doc(Create a MediaPipe int64 Packet from an integer. + + Args: + data: An integer or a np.int64. + + Returns: + A MediaPipe int64 Packet. + + Raises: + TypeError: If the input is neither an integer nor a np.int64. + + Examples: + packet = mp.packet_creator.create_int64(2**63 - 1) + data = mp.packet_getter.get_int(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_uint8", + [](int64 data) { + RaisePyErrorIfOverflow(data, 0, UINT8_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe uint8 Packet from an integer. + + Args: + data: An integer or a np.uint8. + + Returns: + A MediaPipe uint8 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.uint8. + + Examples: + packet = mp.packet_creator.create_uint8(2**8 - 1) + data = mp.packet_getter.get_uint(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_uint16", + [](int64 data) { + RaisePyErrorIfOverflow(data, 0, UINT16_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe uint16 Packet from an integer. + + Args: + data: An integer or a np.uint16. + + Returns: + A MediaPipe uint16 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.uint16. + + Examples: + packet = mp.packet_creator.create_uint16(2**16 - 1) + data = mp.packet_getter.get_uint(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_uint32", + [](int64 data) { + RaisePyErrorIfOverflow(data, 0, UINT32_MAX); + return MakePacket(data); + }, + R"doc(Create a MediaPipe uint32 Packet from an integer. + + Args: + data: An integer or a np.uint32. + + Returns: + A MediaPipe uint32 Packet. + + Raises: + OverflowError: If the input integer overflows. + TypeError: If the input is neither an integer nor a np.uint32. + + Examples: + packet = mp.packet_creator.create_uint32(2**32 - 1) + data = mp.packet_getter.get_uint(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_uint64", [](uint64 data) { return MakePacket(data); }, + R"doc(Create a MediaPipe uint64 Packet from an integer. + + Args: + data: An integer or a np.uint64. + + Returns: + A MediaPipe uint64 Packet. + + Raises: + TypeError: If the input is neither an integer nor a np.uint64. + + Examples: + packet = mp.packet_creator.create_uint64(2**64 - 1) + data = mp.packet_getter.get_uint(packet) +)doc", + // py::arg().noconvert() won't allow this to accept np.uint64 data type. + py::arg(), py::return_value_policy::move); + + m->def( + "create_float", [](float data) { return MakePacket(data); }, + R"doc(Create a MediaPipe float Packet from a float. + + Args: + data: A float or a np.float. + + Returns: + A MediaPipe float Packet. + + Raises: + TypeError: If the input is neither a float nor a np.float. + + Examples: + packet = mp.packet_creator.create_float(0.1) + data = mp.packet_getter.get_float(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_double", [](double data) { return MakePacket(data); }, + R"doc(Create a MediaPipe double Packet from a float. + + Args: + data: A float or a np.double. + + Returns: + A MediaPipe double Packet. + + Raises: + TypeError: If the input is neither a float nore a np.double. + + Examples: + packet = mp.packet_creator.create_double(0.1) + data = mp.packet_getter.get_float(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int_array", + [](const std::vector& data) { + int* ints = new int[data.size()]; + std::copy(data.begin(), data.end(), ints); + return Adopt(reinterpret_cast(ints)); + }, + R"doc(Create a MediaPipe int array Packet from a list of integers. + + Args: + data: A list of integers. + + Returns: + A MediaPipe int array Packet. + + Raises: + TypeError: If the input is not a list of integers. + + Examples: + packet = mp.packet_creator.create_int_array([1, 2, 3]) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_float_array", + [](const std::vector& data) { + float* floats = new float[data.size()]; + std::copy(data.begin(), data.end(), floats); + return Adopt(reinterpret_cast(floats)); + }, + R"doc(Create a MediaPipe float array Packet from a list of floats. + + Args: + data: A list of floats. + + Returns: + A MediaPipe float array Packet. + + Raises: + TypeError: If the input is not a list of floats. + + Examples: + packet = mp.packet_creator.create_float_array([0.1, 0.2, 0.3]) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_int_vector", + [](const std::vector& data) { + return MakePacket>(data); + }, + R"doc(Create a MediaPipe int vector Packet from a list of integers. + + Args: + data: A list of integers. + + Returns: + A MediaPipe int vector Packet. + + Raises: + TypeError: If the input is not a list of integers. + + Examples: + packet = mp.packet_creator.create_int_vector([1, 2, 3]) + data = mp.packet_getter.get_int_vector(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_float_vector", + [](const std::vector& data) { + return MakePacket>(data); + }, + R"doc(Create a MediaPipe float vector Packet from a list of floats. + + Args: + data: A list of floats + + Returns: + A MediaPipe float vector Packet. + + Raises: + TypeError: If the input is not a list of floats. + + Examples: + packet = mp.packet_creator.create_float_vector([0.1, 0.2, 0.3]) + data = mp.packet_getter.get_float_list(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_string_vector", + [](const std::vector& data) { + return MakePacket>(data); + }, + R"doc(Create a MediaPipe std::string vector Packet from a list of str. + + Args: + data: A list of str. + + Returns: + A MediaPipe std::string vector Packet. + + Raises: + TypeError: If the input is not a list of str. + + Examples: + packet = mp.packet_creator.create_string_vector(['a', 'b', 'c']) + data = mp.packet_getter.get_str_list(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_packet_vector", + [](const std::vector& data) { + return MakePacket>(data); + }, + R"doc(Create a MediaPipe Packet holds a vector of packets. + + Args: + data: A list of packets. + + Returns: + A MediaPipe Packet holds a vector of packets. + + Raises: + TypeError: If the input is not a list of packets. + + Examples: + packet = mp.packet_creator.create_packet_vector([ + mp.packet_creator.create_float(0.1), + mp.packet_creator.create_int(1), + mp.packet_creator.create_string('1') + ]) + data = mp.packet_getter.get_packet_vector(packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_string_to_packet_map", + [](const std::map& data) { + return MakePacket>(data); + }, + R"doc(Create a MediaPipe std::string to packet map Packet from a dictionary. + + Args: + data: A dictionary that has (str, Packet) pairs. + + Returns: + A MediaPipe Packet holds std::map. + + Raises: + TypeError: If the input is not a dictionary from str to packet. + + Examples: + dict_packet = mp.packet_creator.create_string_to_packet_map({ + 'float': mp.packet_creator.create_float(0.1), + 'int': mp.packet_creator.create_int(1), + 'std::string': mp.packet_creator.create_string('1') + data = mp.packet_getter.get_str_to_packet_dict(dict_packet) +)doc", + py::arg().noconvert(), py::return_value_policy::move); + + m->def( + "create_matrix", + // Eigen Map class + // (https://eigen.tuxfamily.org/dox/group__TutorialMapClass.html) is the + // way to reuse the external memory as an Eigen type. However, when + // creating an Eigen::MatrixXf from an Eigen Map object, the data copy + // still happens. We can make a packet of an Eigen Map type for reusing + // external memory. However,the packet data type is no longer + // Eigen::MatrixXf. + // TODO: Should take "const Eigen::Ref&" + // as the input argument. Investigate why bazel non-optimized mode + // triggers a memory allocation bug in Eigen::internal::aligned_free(). + [](const Eigen::MatrixXf& matrix) { + // MakePacket copies the data. + return MakePacket(matrix); + }, + R"doc(Create a MediaPipe Matrix Packet from a 2d numpy float ndarray. + + The method copies data from the input MatrixXf and the returned packet owns + a MatrixXf object. + + Args: + matrix: A 2d numpy float ndarray. + + Returns: + A MediaPipe Matrix Packet. + + Raises: + TypeError: If the input is not a 2d numpy float ndarray. + + Examples: + packet = mp.packet_creator.create_matrix( + np.array([[.1, .2, .3], [.4, .5, .6]]) + matrix = mp.packet_getter.get_matrix(packet) +)doc", + py::return_value_policy::move); +} + +void InternalPacketCreators(pybind11::module* m) { + m->def( + "_create_image_frame_with_copy", + [](mediapipe::ImageFormat::Format format, const py::array& data) { + return CreateImageFramePacket(format, data); + }, + py::arg("format"), py::arg("data").noconvert(), + py::return_value_policy::move); + + m->def( + "_create_image_frame_with_reference", + [](mediapipe::ImageFormat::Format format, const py::array& data) { + throw RaisePyError( + PyExc_NotImplementedError, + "Creating image frame packet with reference is not supproted yet."); + }, + py::arg("format"), py::arg("data").noconvert(), + py::return_value_policy::move); + + m->def( + "_create_image_frame_with_copy", + [](ImageFrame& image_frame) { + auto image_frame_copy = absl::make_unique(); + // Set alignment_boundary to kGlDefaultAlignmentBoundary so that + // both GPU and CPU can process it. + image_frame_copy->CopyFrom(image_frame, + ImageFrame::kGlDefaultAlignmentBoundary); + return Adopt(image_frame_copy.release()); + }, + py::arg("image_frame").noconvert(), py::return_value_policy::move); + + m->def( + "_create_image_frame_with_reference", + [](ImageFrame& image_frame) { + throw RaisePyError( + PyExc_NotImplementedError, + "Creating image frame packet with reference is not supproted yet."); + }, + py::arg("image_frame").noconvert(), py::return_value_policy::move); + + m->def( + "_create_proto", + [](const std::string& type_name, const py::bytes& serialized_proto) { + using packet_internal::HolderBase; + mediapipe::StatusOr> maybe_holder = + packet_internal::MessageHolderRegistry::CreateByName(type_name); + if (!maybe_holder.ok()) { + throw RaisePyError( + PyExc_RuntimeError, + absl::StrCat("Unregistered proto message type: ", type_name) + .c_str()); + } + // Creates a Packet with the concrete C++ payload type. + std::unique_ptr message_holder = + std::move(maybe_holder).ValueOrDie(); + auto* copy = const_cast( + message_holder->GetProtoMessageLite()); + copy->ParseFromString(serialized_proto); + return packet_internal::Create(message_holder.release()); + }, + py::return_value_policy::move); + + m->def( + "_create_proto_vector", + [](const std::string& type_name, + const std::vector& serialized_proto_vector) { + // TODO: Implement this. + throw RaisePyError(PyExc_NotImplementedError, + "Creating a packet from a vector of proto messages " + "is not supproted yet."); + return Packet(); + }, + py::return_value_policy::move); +} + +void PacketCreatorSubmodule(pybind11::module* module) { + py::module m = module->def_submodule( + "_packet_creator", "MediaPipe internal packet creator module."); + PublicPacketCreators(&m); + InternalPacketCreators(&m); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/packet_creator.h b/mediapipe/python/pybind/packet_creator.h new file mode 100644 index 000000000..a35b382fa --- /dev/null +++ b/mediapipe/python/pybind/packet_creator.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_PACKET_CREATOR_H_ +#define MEDIAPIPE_PYTHON_PYBIND_PACKET_CREATOR_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void PacketCreatorSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_PACKET_CREATOR_H_ diff --git a/mediapipe/python/pybind/packet_getter.cc b/mediapipe/python/pybind/packet_getter.cc new file mode 100644 index 000000000..57411a54a --- /dev/null +++ b/mediapipe/python/pybind/packet_getter.cc @@ -0,0 +1,395 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/packet_getter.h" + +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/python/pybind/image_frame_util.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/eigen.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace mediapipe { +namespace python { +namespace { + +template +const T& GetContent(const Packet& packet) { + RaisePyErrorIfNotOk(packet.ValidateAsType()); + return packet.Get(); +} + +} // namespace + +namespace py = pybind11; + +void PublicPacketGetters(pybind11::module* m) { + m->def("get_str", &GetContent, + R"doc(Get the content of a MediaPipe std::string Packet as a str. + + Args: + packet: A MediaPipe std::string Packet. + + Returns: + A str. + + Raises: + ValueError: If the Packet doesn't contain std::string data. + + Examples: + packet = mp.packet_creator.create_string('abc') + data = mp.packet_getter.get_str(packet) +)doc"); + + m->def( + "get_bytes", + [](const Packet& packet) { + return py::bytes(GetContent(packet)); + }, + R"doc(Get the content of a MediaPipe std::string Packet as a bytes object. + + Args: + packet: A MediaPipe std::string Packet. + + Returns: + A bytes object. + + Raises: + ValueError: If the Packet doesn't contain std::string data. + + Examples: + packet = mp.packet_creator.create_string(b'\xd0\xd0\xd0') + data = mp.packet_getter.get_bytes(packet) +)doc"); + + m->def("get_bool", &GetContent, + R"doc(Get the content of a MediaPipe bool Packet as a boolean. + + Args: + packet: A MediaPipe bool Packet. + + Returns: + A boolean. + + Raises: + ValueError: If the Packet doesn't contain bool data. + + Examples: + packet = mp.packet_creator.create_bool(True) + data = mp.packet_getter.get_bool(packet) +)doc"); + + m->def( + "get_int", + [](const Packet& packet) { + if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } + throw RaisePyError( + PyExc_ValueError, + "Packet doesn't contain int, int8, int16, int32, or int64 data."); + }, + R"doc(Get the content of a MediaPipe int Packet as an integer. + + Args: + packet: A MediaPipe Packet that holds int, int8, int16, int32, or int64 data. + + Returns: + An integer. + + Raises: + ValueError: If the Packet doesn't contain int, int8, int16, int32, or int64 data. + + Examples: + packet = mp.packet_creator.create_int(0) + data = mp.packet_getter.get_int(packet) +)doc"); + + m->def( + "get_uint", + [](const Packet& packet) { + if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } + throw RaisePyError( + PyExc_ValueError, + "Packet doesn't contain uint8, uint16, uint32, or uint64 data."); + }, + R"doc(Get the content of a MediaPipe uint Packet as an integer. + + Args: + packet: A MediaPipe Packet that holds uint8, uint16, uint32, or uint64 data. + + Raises: + ValueError: If the Packet doesn't contain uint8, uint16, uint32, or uint64 data. + + Returns: + An integer. + + Examples: + packet = mp.packet_creator.create_uint8(2**8 - 1) + data = mp.packet_getter.get_uint(packet) +)doc"); + + m->def( + "get_float", + [](const Packet& packet) { + if (packet.ValidateAsType().ok()) { + return packet.Get(); + } else if (packet.ValidateAsType().ok()) { + return static_cast(packet.Get()); + } + throw RaisePyError(PyExc_ValueError, + "Packet doesn't contain float or double data."); + }, + R"doc(Get the content of a MediaPipe float or double Packet as a float. + + Args: + packet: A MediaPipe Packet that holds float or double data. + + Raises: + ValueError: If the Packet doesn't contain float or double data. + + Returns: + A float. + + Examples: + packet = mp.packet_creator.create_float(0.1) + data = mp.packet_getter.get_float(packet) +)doc"); + + m->def( + "get_int_list", &GetContent>, + R"doc(Get the content of a MediaPipe int vector Packet as an integer list. + + Args: + packet: A MediaPipe Packet that holds std:vector. + + Returns: + An integer list. + + Raises: + ValueError: If the Packet doesn't contain std:vector. + + Examples: + packet = mp.packet_creator.create_int_vector([1, 2, 3]) + data = mp.packet_getter.get_int_list(packet) +)doc"); + + m->def( + "get_float_list", &GetContent>, + R"doc(Get the content of a MediaPipe float vector Packet as a float list. + + Args: + packet: A MediaPipe Packet that holds std:vector. + + Returns: + A float list. + + Raises: + ValueError: If the Packet doesn't contain std:vector. + + Examples: + packet = packet_creator.create_float_vector([0.1, 0.2, 0.3]) + data = packet_getter.get_float_list(packet) +)doc"); + + m->def( + "get_str_list", &GetContent>, + R"doc(Get the content of a MediaPipe std::string vector Packet as a str list. + + Args: + packet: A MediaPipe Packet that holds std:vector. + + Returns: + A str list. + + Raises: + ValueError: If the Packet doesn't contain std:vector. + + Examples: + packet = mp.packet_creator.create_string_vector(['a', 'b', 'c']) + data = mp.packet_getter.get_str_list(packet) +)doc"); + + m->def( + "get_packet_list", &GetContent>, + R"doc(Get the content of a MediaPipe Packet of Packet vector as a Packet list. + + Args: + packet: A MediaPipe Packet that holds std:vector. + + Returns: + A Packet list. + + Raises: + ValueError: If the Packet doesn't contain std:vector. + + Examples: + packet = mp.packet_creator.create_packet_vector([ + packet_creator.create_float(0.1), + packet_creator.create_int(1), + packet_creator.create_string('1') + ]) + packet_list = mp.packet_getter.get_packet_list(packet) +)doc"); + + m->def( + "get_str_to_packet_dict", &GetContent>, + + R"doc(Get the content of a MediaPipe Packet as a dictionary that has (str, Packet) pairs. + + Args: + packet: A MediaPipe Packet that holds std::map. + + Returns: + A dictionary that has (str, Packet) pairs. + + Raises: + ValueError: If the Packet doesn't contain std::map. + + Examples: + dict_packet = mp.packet_creator.create_string_to_packet_map({ + 'float': packet_creator.create_float(0.1), + 'int': packet_creator.create_int(1), + 'std::string': packet_creator.create_string('1') + data = mp.packet_getter.get_str_to_packet_dict(dict_packet) +)doc"); + + m->def( + "get_image_frame", &GetContent, + R"doc(Get the content of a MediaPipe ImageFrame Packet as an ImageFrame object. + + Args: + packet: A MediaPipe ImageFrame Packet. + + Returns: + A MediaPipe ImageFrame object. + + Raises: + ValueError: If the Packet doesn't contain ImageFrame. + + Examples: + packet = packet_creator.create_image_frame(frame) + data = packet_getter.get_image_frame(packet) +)doc", + py::return_value_policy::reference_internal); + + m->def( + "get_matrix", + [](const Packet& packet) { + return Eigen::Ref(GetContent(packet)); + }, + R"doc(Get the content of a MediaPipe Matrix Packet as a numpy 2d float ndarray. + + Args: + packet: A MediaPipe Matrix Packet. + + Returns: + A numpy 2d float ndarray. + + Raises: + ValueError: If the Packet doesn't contain matrix data. + + Examples: + packet = mp.packet_creator.create_matrix(2d_array) + data = mp.packet_getter.get_matrix(packet) +)doc", + py::return_value_policy::reference_internal); +} + +void InternalPacketGetters(pybind11::module* m) { + m->def( + "_get_proto_type_name", + [](const Packet& packet) { + return packet.GetProtoMessageLite().GetTypeName(); + }, + py::return_value_policy::move); + + m->def( + "_get_proto_vector_size", + [](Packet& packet) { + auto proto_vector = packet.GetVectorOfProtoMessageLitePtrs(); + RaisePyErrorIfNotOk(proto_vector.status()); + return proto_vector.ValueOrDie().size(); + }, + py::return_value_policy::move); + + m->def( + "_get_proto_vector_element_type_name", + [](Packet& packet) { + auto proto_vector = packet.GetVectorOfProtoMessageLitePtrs(); + RaisePyErrorIfNotOk(proto_vector.status()); + if (proto_vector.ValueOrDie().empty()) { + return std::string(); + } + return proto_vector.ValueOrDie()[0]->GetTypeName(); + }, + py::return_value_policy::move); + + m->def( + "_get_serialized_proto", + [](const Packet& packet) { + // By default, py::bytes is an extra copy of the original std::string + // object: https://github.com/pybind/pybind11/issues/1236 Howeover, when + // Pybind11 performs the C++ to Python transition, it only increases the + // py::bytes object's ref count. See the implmentation at line 1583 in + // "pybind11/cast.h". + return py::bytes(packet.GetProtoMessageLite().SerializeAsString()); + }, + py::return_value_policy::move); + + m->def( + "_get_serialized_proto_list", + [](Packet& packet) { + auto proto_vector = packet.GetVectorOfProtoMessageLitePtrs(); + RaisePyErrorIfNotOk(proto_vector.status()); + int size = proto_vector.ValueOrDie().size(); + std::vector results; + results.reserve(size); + for (const proto_ns::MessageLite* ptr : proto_vector.ValueOrDie()) { + results.push_back(py::bytes(ptr->SerializeAsString())); + } + return results; + }, + py::return_value_policy::move); +} + +void PacketGetterSubmodule(pybind11::module* module) { + py::module m = module->def_submodule( + "_packet_getter", "MediaPipe internal packet getter module."); + PublicPacketGetters(&m); + InternalPacketGetters(&m); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/packet_getter.h b/mediapipe/python/pybind/packet_getter.h new file mode 100644 index 000000000..82a486db5 --- /dev/null +++ b/mediapipe/python/pybind/packet_getter.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_PACKET_GETTER_H_ +#define MEDIAPIPE_PYTHON_PYBIND_PACKET_GETTER_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void PacketGetterSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_PACKET_GETTER_H_ diff --git a/mediapipe/python/pybind/resource_util.cc b/mediapipe/python/pybind/resource_util.cc new file mode 100644 index 000000000..d724c1bbe --- /dev/null +++ b/mediapipe/python/pybind/resource_util.cc @@ -0,0 +1,47 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/resource_util.h" + +#include "absl/flags/declare.h" +#include "absl/flags/flag.h" + +ABSL_DECLARE_FLAG(std::string, resource_root_dir); + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +void ResourceUtilSubmodule(pybind11::module* module) { + py::module m = + module->def_submodule("resource_util", "MediaPipe resource util module."); + + m.def( + "set_resource_dir", + [](const std::string& str) { + absl::SetFlag(&FLAGS_resource_root_dir, str); + }, + R"doc(Set resource root directory where can find necessary graph resources such as model files and label maps. + + Args: + str: A UTF-8 str. + + Examples: + mp.resource_util.set_resource_dir('/path/to/resource') +)doc"); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/resource_util.h b/mediapipe/python/pybind/resource_util.h new file mode 100644 index 000000000..5f64c665a --- /dev/null +++ b/mediapipe/python/pybind/resource_util.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_RESOURCE_UTIL_H_ +#define MEDIAPIPE_PYTHON_PYBIND_RESOURCE_UTIL_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void ResourceUtilSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_RESOURCE_UTIL_H_ diff --git a/mediapipe/python/pybind/timestamp.cc b/mediapipe/python/pybind/timestamp.cc new file mode 100644 index 000000000..6d4b58a5a --- /dev/null +++ b/mediapipe/python/pybind/timestamp.cc @@ -0,0 +1,144 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/python/pybind/timestamp.h" + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/timestamp.h" +#include "mediapipe/python/pybind/util.h" +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +void TimestampSubmodule(pybind11::module* module) { + py::module m = + module->def_submodule("timestamp", "MediaPipe timestamp module."); + + py::class_ timestamp( + m, "Timestamp", + R"doc(A class which represents a timestamp in the MediaPipe framework. + + MediaPipe timestamps are in units of _microseconds_. + There are several special values (All these values must be constructed using + the static methods provided): + UNSET: The default initialization value, not generally valid when a + timestamp is required. + UNSTARTED: The timestamp before any valid timestamps. This is the input + timestamp during Calcultor::Open(). + PRESTREAM: A value for specifying that a packet contains "header" data + that should be processed before any other timestamp. Like + poststream, if this value is sent then it must be the only + value that is sent on the stream. + MIN: The minimum range timestamp to see in Calcultor::Process(). + Any number of "range" timestamp can be sent over a stream, + provided that they are sent in monotonically increasing order. + MAX: The maximum range timestamp to see in Process(). + POSTSTREAM: A value for specifying that a packet pertains to the entire + stream. This "summary" timestamp occurs after all the "range" + timestamps. If this timestamp is sent on a stream, it must be + the only packet sent. + DONE: The timestamp after all valid timestamps. + This is the input timestamp during Calcultor::Close(). +)doc"); + + timestamp.def(py::init()) + .def(py::init()) + .def_property_readonly("value", &Timestamp::Value) + .def_property_readonly_static( + "UNSET", [](py::object) { return Timestamp::Unset(); }) + .def_property_readonly_static( + "UNSTARTED", [](py::object) { return Timestamp::Unstarted(); }) + .def_property_readonly_static( + "PRESTREAM", [](py::object) { return Timestamp::PreStream(); }) + .def_property_readonly_static("MIN", + [](py::object) { return Timestamp::Min(); }) + .def_property_readonly_static("MAX", + [](py::object) { return Timestamp::Max(); }) + .def_property_readonly_static( + "POSTSTREAM", [](py::object) { return Timestamp::PostStream(); }) + .def_property_readonly_static( + "DONE", [](py::object) { return Timestamp::Done(); }) + .def("__eq__", + [](const Timestamp& a, const Timestamp& b) { return a == b; }) + .def("__lt__", + [](const Timestamp& a, const Timestamp& b) { return a < b; }) + .def("__gt__", + [](const Timestamp& a, const Timestamp& b) { return a > b; }) + .def("__le__", + [](const Timestamp& a, const Timestamp& b) { return a <= b; }) + .def("__ge__", + [](const Timestamp& a, const Timestamp& b) { return a >= b; }) + .def("__repr__", [](const Timestamp& self) { + return absl::StrCat(""); + }); + + timestamp.def("seconds", &Timestamp::Seconds, + R"doc(Return the value in units of seconds as a float.)doc"); + + timestamp.def( + "microseconds", &Timestamp::Microseconds, + R"doc(Return the value in units of microseconds as an int.)doc"); + + timestamp.def("is_special_value", &Timestamp::IsSpecialValue, + R"doc(Check if the timestamp is a special value, + + A special value is any of the values which cannot be constructed directly + but must be constructed using the static special value. + +)doc"); + + timestamp.def( + "is_range_value", &Timestamp::IsRangeValue, + R"doc(Check if the timestamp is a range value is anything between Min() and Max() (inclusive). + + Any number of packets with range values can be sent over a stream as long as + they are sent in monotonically increasing order. is_range_value() isn't + quite the opposite of is_special_value() since it is valid to start a stream + at Timestamp::Min() and continue until timestamp max (both of which are + special values). prestream and postStream are not considered a range value + even though they can be sent over a stream (they are "summary" timestamps not + "range" timestamps). +)doc"); + + timestamp.def( + "is_allowed_in_stream", &Timestamp::IsAllowedInStream, + R"doc(Returns true iff this can be the timestamp of a Packet in a stream. + + Any number of RangeValue timestamps may be in a stream (in monotonically + increasing order). Also, exactly one prestream, or one poststream packet is + allowed. +)doc"); + + timestamp.def_static("from_seconds", &Timestamp::FromSeconds, + R"doc(Create a timestamp from a seconds value + + Args: + seconds: A seconds value in float. + + Returns: + A MediaPipe Timestamp object. + + Examples: + timestamp_now = mp.Timestamp.from_seconds(time.time()) +)doc"); + + py::implicitly_convertible(); +} + +} // namespace python +} // namespace mediapipe diff --git a/mediapipe/python/pybind/timestamp.h b/mediapipe/python/pybind/timestamp.h new file mode 100644 index 000000000..028235f71 --- /dev/null +++ b/mediapipe/python/pybind/timestamp.h @@ -0,0 +1,28 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_TIMESTAMP_H_ +#define MEDIAPIPE_PYTHON_PYBIND_TIMESTAMP_H_ + +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +void TimestampSubmodule(pybind11::module* module); + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_TIMESTAMP_H_ diff --git a/mediapipe/python/pybind/util.h b/mediapipe/python/pybind/util.h new file mode 100644 index 000000000..11cec05f6 --- /dev/null +++ b/mediapipe/python/pybind/util.h @@ -0,0 +1,92 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_PYTHON_PYBIND_UTIL_H_ +#define MEDIAPIPE_PYTHON_PYBIND_UTIL_H_ + +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/timestamp.h" +#include "pybind11/pybind11.h" + +namespace mediapipe { +namespace python { + +namespace py = pybind11; + +inline py::error_already_set RaisePyError(PyObject* exc_class, + const char* message) { + PyErr_SetString(exc_class, message); + return py::error_already_set(); +} + +inline PyObject* StatusCodeToPyError(const ::absl::StatusCode& code) { + switch (code) { + case absl::StatusCode::kInvalidArgument: + return static_cast(PyExc_ValueError); + case absl::StatusCode::kAlreadyExists: + return static_cast(PyExc_FileExistsError); + case absl::StatusCode::kUnimplemented: + return static_cast(PyExc_NotImplementedError); + default: + return static_cast(PyExc_RuntimeError); + } +} + +inline void RaisePyErrorIfNotOk(const mediapipe::Status& status) { + if (!status.ok()) { + throw RaisePyError(StatusCodeToPyError(status.code()), + status.message().data()); + } +} + +inline void RaisePyErrorIfOverflow(int64 value, int64 min, int64 max) { + if (value > max) { + throw RaisePyError(PyExc_OverflowError, + absl::StrCat(value, " execeeds the maximum value (", max, + ") the data type can have.") + .c_str()); + } else if (value < min) { + throw RaisePyError(PyExc_OverflowError, + absl::StrCat(value, " goes below the minimum value (", + min, ") the data type can have.") + .c_str()); + } +} + +inline std::string TimestampValueString(const Timestamp& timestamp) { + if (timestamp == Timestamp::Unset()) { + return "UNSET"; + } else if (timestamp == Timestamp::Unstarted()) { + return "UNSTARTED"; + } else if (timestamp == Timestamp::PreStream()) { + return "PRESTREAM"; + } else if (timestamp == Timestamp::Min()) { + return "MIN"; + } else if (timestamp == Timestamp::Max()) { + return "MAX"; + } else if (timestamp == Timestamp::PostStream()) { + return "POSTSTREAM"; + } else if (timestamp == Timestamp::OneOverPostStream()) { + return "ONEOVERPOSTSTREAM"; + } else if (timestamp == Timestamp::Done()) { + return "DONE"; + } else { + return timestamp.DebugString(); + } +} + +} // namespace python +} // namespace mediapipe + +#endif // MEDIAPIPE_PYTHON_PYBIND_UTIL_H_ diff --git a/mediapipe/python/timestamp_test.py b/mediapipe/python/timestamp_test.py new file mode 100644 index 000000000..fc051d966 --- /dev/null +++ b/mediapipe/python/timestamp_test.py @@ -0,0 +1,75 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for mediapipe.python._framework_bindings.timestamp.""" + +import time + +from absl.testing import absltest +import mediapipe.python as mp + + +class TimestampTest(absltest.TestCase): + + def testTimesatmp(self): + t = mp.Timestamp(100) + self.assertEqual(t.value, 100) + self.assertEqual(t, 100) + self.assertEqual(str(t), '') + + def testTimestampCopyConstructor(self): + ts1 = mp.Timestamp(100) + ts2 = mp.Timestamp(ts1) + self.assertEqual(ts1, ts2) + + def testTimesatmpComparsion(self): + ts1 = mp.Timestamp(100) + ts2 = mp.Timestamp(100) + self.assertEqual(ts1, ts2) + ts3 = mp.Timestamp(200) + self.assertNotEqual(ts1, ts3) + + def testTimesatmpSpecialValues(self): + t1 = mp.Timestamp.UNSET + self.assertEqual(str(t1), '') + t2 = mp.Timestamp.UNSTARTED + self.assertEqual(str(t2), '') + t3 = mp.Timestamp.PRESTREAM + self.assertEqual(str(t3), '') + t4 = mp.Timestamp.MIN + self.assertEqual(str(t4), '') + t5 = mp.Timestamp.MAX + self.assertEqual(str(t5), '') + t6 = mp.Timestamp.POSTSTREAM + self.assertEqual(str(t6), '') + t7 = mp.Timestamp.DONE + self.assertEqual(str(t7), '') + + def testTimestampComparisons(self): + ts1 = mp.Timestamp(100) + ts2 = mp.Timestamp(101) + self.assertGreater(ts2, ts1) + self.assertGreaterEqual(ts2, ts1) + self.assertLess(ts1, ts2) + self.assertLessEqual(ts1, ts2) + self.assertNotEqual(ts1, ts2) + + def testFromSeconds(self): + now = time.time() + ts = mp.Timestamp.from_seconds(now) + self.assertAlmostEqual(now, ts.seconds(), delta=1) + + +if __name__ == '__main__': + absltest.main() diff --git a/mediapipe/util/tflite/operations/max_pool_argmax.cc b/mediapipe/util/tflite/operations/max_pool_argmax.cc index e87c8dd96..c55c16f5d 100644 --- a/mediapipe/util/tflite/operations/max_pool_argmax.cc +++ b/mediapipe/util/tflite/operations/max_pool_argmax.cc @@ -113,9 +113,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 2); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TfLiteTensor* indices = ::tflite::GetOutput(context, node, kIndicesTensor); + TF_LITE_ENSURE(context, indices != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(input), 4); TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); @@ -168,9 +171,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->user_data); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TfLiteTensor* indices = ::tflite::GetOutput(context, node, kIndicesTensor); + TF_LITE_ENSURE(context, indices != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); float activation_min, activation_max; ::tflite::CalculateActivationRange(params->activation, &activation_min, diff --git a/mediapipe/util/tflite/operations/max_unpooling.cc b/mediapipe/util/tflite/operations/max_unpooling.cc index 078927dd4..ca35ebaab 100644 --- a/mediapipe/util/tflite/operations/max_unpooling.cc +++ b/mediapipe/util/tflite/operations/max_unpooling.cc @@ -70,10 +70,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* indices = ::tflite::GetInput(context, node, kIndicesTensor); + TF_LITE_ENSURE(context, indices != nullptr); TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(indices), 4); TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(input), 4); TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); @@ -107,10 +110,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->user_data); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); const TfLiteTensor* indices = ::tflite::GetInput(context, node, kIndicesTensor); + TF_LITE_ENSURE(context, indices != nullptr); float activation_min, activation_max; ::tflite::CalculateActivationRange(params->activation, &activation_min, diff --git a/mediapipe/util/tflite/operations/transpose_conv_bias.cc b/mediapipe/util/tflite/operations/transpose_conv_bias.cc index e25cdfc19..1fe5acd74 100644 --- a/mediapipe/util/tflite/operations/transpose_conv_bias.cc +++ b/mediapipe/util/tflite/operations/transpose_conv_bias.cc @@ -121,10 +121,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* weights = ::tflite::GetInput(context, node, kWeightsTensor); + TF_LITE_ENSURE(context, weights != nullptr); const TfLiteTensor* bias = ::tflite::GetInput(context, node, kBiasTensor); + TF_LITE_ENSURE(context, bias != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(input), 4); TF_LITE_ENSURE_EQ(context, ::tflite::NumDimensions(weights), 4); @@ -184,10 +188,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* weights = ::tflite::GetInput(context, node, kWeightsTensor); + TF_LITE_ENSURE(context, weights != nullptr); const TfLiteTensor* bias = ::tflite::GetInput(context, node, kBiasTensor); + TF_LITE_ENSURE(context, bias != nullptr); const TfLiteTensor* input = ::tflite::GetInput(context, node, kDataInputTensor); + TF_LITE_ENSURE(context, input != nullptr); TfLiteTensor* output = ::tflite::GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); const auto* params = reinterpret_cast( node->custom_initial_data); diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..31b2a55c3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +absl-py==0.9.0 +numpy==1.19.1 +opencv-python==3.4.10.35 +protobuf>=3.11.4 +six==1.15.0 +wheel>=0.34.0 diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..17d81be60 --- /dev/null +++ b/setup.py @@ -0,0 +1,275 @@ +"""Copyright 2020 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Setup for MediaPipe package with setuptools. +""" + +from distutils import spawn +import distutils.command.build as build +import distutils.command.clean as clean +import glob +import os +import posixpath +import shutil +import subprocess +import sys + +import setuptools +import setuptools.command.build_ext as build_ext +import setuptools.command.install as install + +__version__ = '0.79' +MP_ROOT_PATH = os.path.dirname(os.path.abspath(__file__)) +ROOT_INIT_PY = os.path.join(MP_ROOT_PATH, '__init__.py') +if not os.path.exists(ROOT_INIT_PY): + open(ROOT_INIT_PY, 'w').close() + + +def _parse_requirements(path): + with open(os.path.join(MP_ROOT_PATH, path)) as f: + return [ + line.rstrip() + for line in f + if not (line.isspace() or line.startswith('#')) + ] + + +def _check_bazel(): + """Check Bazel binary as well as its version.""" + + if not spawn.find_executable('bazel'): + sys.stderr.write('could not find bazel executable. Please install bazel to' + 'build the MediaPipe Python package.') + sys.exit(-1) + try: + bazel_version_info = subprocess.check_output(['bazel', '--version']) + except subprocess.CalledProcessError: + sys.stderr.write('fail to get bazel version by $ bazel --version.') + bazel_version_info = bazel_version_info.decode('UTF-8').strip() + version = bazel_version_info.split('bazel ')[1].split('-')[0] + version_segments = version.split('.') + # Treat "0.24" as "0.24.0" + if len(version_segments) == 2: + version_segments.append('0') + for seg in version_segments: + if not seg.isdigit(): + sys.stderr.write('invalid bazel version number: %s\n' % version_segments) + sys.exit(-1) + bazel_version = int(''.join(['%03d' % int(seg) for seg in version_segments])) + if bazel_version < 2000000: + sys.stderr.write( + 'the current bazel version is older than the minimum version that MediaPipe can support. Please upgrade bazel.' + ) + + +class GeneratePyProtos(build.build): + """Generate MediaPipe Python protobuf files by Protocol Compiler.""" + + def run(self): + if 'PROTOC' in os.environ and os.path.exists(os.environ['PROTOC']): + self._protoc = os.environ['PROTOC'] + else: + self._protoc = spawn.find_executable('protoc') + if self._protoc is None: + sys.stderr.write( + 'protoc is not found. Please run \'apt install -y protobuf' + '-compiler\' (linux) or \'brew install protobuf\'(macos) to install ' + 'protobuf compiler binary.') + sys.exit(-1) + # Build framework protos. + for proto_file in glob.glob( + 'mediapipe/framework/**/*.proto', recursive=True): + if proto_file.endswith('test.proto'): + continue + proto_dir = os.path.dirname(os.path.abspath(proto_file)) + if proto_dir.endswith('testdata'): + continue + init_py = os.path.join(proto_dir, '__init__.py') + if not os.path.exists(init_py): + sys.stderr.write('adding necessary __init__ file: %s\n' % init_py) + open(init_py, 'w').close() + self._generate_proto(proto_file) + + def _generate_proto(self, source): + """Invokes the Protocol Compiler to generate a _pb2.py.""" + + output = source.replace('.proto', '_pb2.py') + sys.stderr.write('generating proto file: %s\n' % output) + if (not os.path.exists(output) or + (os.path.exists(source) and + os.path.getmtime(source) > os.path.getmtime(output))): + + if not os.path.exists(source): + sys.stderr.write('cannot find required file: %s\n' % source) + sys.exit(-1) + + protoc_command = [self._protoc, '-I.', '--python_out=.', source] + if subprocess.call(protoc_command) != 0: + sys.exit(-1) + + +class BuildBinaryGraphs(build.build): + """Build binary graphs for Python examples.""" + + def run(self): + _check_bazel() + binary_graphs = ['pose_tracking/upper_body_pose_tracking_cpu_binary_graph'] + for binary_graph in binary_graphs: + sys.stderr.write('generating binarypb: %s\n' % + os.path.join('mediapipe/graphs/', binary_graph)) + self._generate_binary_graph(binary_graph) + + def _generate_binary_graph(self, graph_path): + """Generate binary graph for a particular MediaPipe binary graph target.""" + + bazel_command = [ + 'bazel', + 'build', + '--compilation_mode=opt', + '--define=MEDIAPIPE_DISABLE_GPU=1', + os.path.join('mediapipe/graphs/', graph_path), + ] + if subprocess.call(bazel_command) != 0: + sys.exit(-1) + output_name = graph_path.replace('_binary_graph', '.binarypb') + output_file = os.path.join('mediapipe/graphs', output_name) + shutil.copyfile( + os.path.join('bazel-bin/mediapipe/graphs/', output_name), output_file) + + +class BazelExtension(setuptools.Extension): + """A C/C++ extension that is defined as a Bazel BUILD target.""" + + def __init__(self, bazel_target, target_name=''): + self.bazel_target = bazel_target + self.relpath, self.target_name = ( + posixpath.relpath(bazel_target, '//').split(':')) + if target_name: + self.target_name = target_name + ext_name = os.path.join( + self.relpath.replace(posixpath.sep, os.path.sep), self.target_name) + setuptools.Extension.__init__(self, ext_name, sources=[]) + + +class BuildBazelExtension(build_ext.build_ext): + """A command that runs Bazel to build a C/C++ extension.""" + + def run(self): + _check_bazel() + for ext in self.extensions: + self.bazel_build(ext) + build_ext.build_ext.run(self) + + def bazel_build(self, ext): + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + bazel_argv = [ + 'bazel', + 'build', + '--compilation_mode=opt', + '--define=MEDIAPIPE_DISABLE_GPU=1', + '--action_env=PYTHON_BIN_PATH=' + sys.executable, + str(ext.bazel_target + '.so'), + ] + self.spawn(bazel_argv) + ext_bazel_bin_path = os.path.join('bazel-bin', ext.relpath, + ext.target_name + '.so') + ext_dest_path = self.get_ext_fullpath(ext.name) + ext_dest_dir = os.path.dirname(ext_dest_path) + if not os.path.exists(ext_dest_dir): + os.makedirs(ext_dest_dir) + shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + + +class Build(build.build): + """Build command that builds binary graphs and extension and does a cleanup afterwards.""" + + def run(self): + self.run_command('build_binary_graphs') + self.run_command('build_ext') + build.build.run(self) + self.run_command('remove_generated') + + +class Install(install.install): + """Install command that builds binary graphs and extension and does a cleanup afterwards.""" + + def run(self): + self.run_command('build_binary_graphs') + self.run_command('build_ext') + install.install.run(self) + self.run_command('remove_generated') + + +class RemoveGenerated(clean.clean): + """Remove the generated files.""" + + def run(self): + for py_file in glob.glob('mediapipe/framework/**/*.py', recursive=True): + sys.stderr.write('removing generated files: %s\n' % py_file) + os.remove(py_file) + for binarypb_file in glob.glob( + 'mediapipe/graphs/**/*.binarypb', recursive=True): + sys.stderr.write('removing generated binary graphs: %s\n' % binarypb_file) + os.remove(binarypb_file) + clean.clean.run(self) + + +setuptools.setup( + name='mediapipe', + version=__version__, + url='https://github.com/google/mediapipe', + description='MediaPipe is the simplest way for researchers and developers to build world-class ML solutions and applications for mobile, edge, cloud and the web.', + author='Mediapipe Authors', + author_email='mediapipe@google.com', + long_description=open(os.path.join(MP_ROOT_PATH, 'README.md')).read(), + long_description_content_type='text/markdown', + packages=setuptools.find_packages(exclude=['mediapipe.examples.desktop.*']), + install_requires=_parse_requirements('requirements.txt'), + cmdclass={ + 'build': Build, + 'gen_protos': GeneratePyProtos, + 'build_binary_graphs': BuildBinaryGraphs, + 'build_ext': BuildBazelExtension, + 'install': Install, + 'remove_generated': RemoveGenerated, + }, + ext_modules=[ + BazelExtension('//mediapipe/python:_framework_bindings'), + ], + zip_safe=False, + include_package_data=True, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3 :: Only', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + license='Apache 2.0', + keywords='mediapipe', +) + +os.remove(ROOT_INIT_PY)