diff --git a/BUILD b/BUILD.bazel similarity index 94% rename from BUILD rename to BUILD.bazel index f225f24e3..1973f98af 100644 --- a/BUILD +++ b/BUILD.bazel @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) exports_files(["LICENSE"]) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3703a7014..d7a1e1877 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,15 +1,17 @@ # Contributing guidelines -## Pull Request Checklist +## What type of pull request do we accept into MediaPipe repository? + +* Bug fixes +* Documentation fixes + +For new feature additions (e.g., new graphs and calculators), we are currently not planning to accept new feature pull requests into the MediaPipe repository. Instead, we like to get contributors to create their own repositories of the new feature and list it at [Awesome MediaPipe](https://mediapipe.org). This will allow contributors to more quickly get their code out to the community. Before sending your pull requests, make sure you followed this list. - Read [contributing guidelines](CONTRIBUTING.md). - Read [Code of Conduct](CODE_OF_CONDUCT.md). - Ensure you have signed the [Contributor License Agreement (CLA)](https://cla.developers.google.com/). -- Check if my changes are consistent with the [guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md#general-guidelines-and-philosophy-for-contribution). -- Changes are consistent with the [Coding Style](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md#c-coding-style). -- Run [Unit Tests](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md#running-unit-tests). ## How to become a contributor and submit your own code @@ -28,100 +30,7 @@ Follow either of the two links above to access the appropriate CLA and instructi ### Contributing code -If you have improvements to MediaPipe, send us your pull requests! For those +If you have bug fixes and documentation fixes to MediaPipe, send us your pull requests! For those just getting started, GitHub has a [howto](https://help.github.com/articles/using-pull-requests/). -MediaPipe team members will be assigned to review your pull requests. Once the -pull requests are approved and pass continuous integration checks, a MediaPipe -team member will apply `ready to pull` label to your change. This means we are -working on getting your pull request submitted to our internal repository. After -the change has been submitted internally, your pull request will be merged -automatically on GitHub. - -If you want to contribute but you're not sure where to start, take a look at the -[issues with the "contributions welcome" label](https://github.com/google/mediapipe/labels/stat%3Acontributions%20welcome). -These are issues that we believe are particularly well suited for outside -contributions, often because we probably won't get to them right now. If you -decide to start on an issue, leave a comment so that other people know that -you're working on it. If you want to help out, but not alone, use the issue -comment thread to coordinate. - -### Contribution guidelines and standards - -Before sending your pull request for -[review](https://github.com/google/mediapipe/pulls), -make sure your changes are consistent with the guidelines and follow the -MediaPipe coding style. - -#### General guidelines and philosophy for contribution - -* Include unit tests when you contribute new features, as they help to a) - prove that your code works correctly, and b) guard against future breaking - changes to lower the maintenance cost. -* Bug fixes also generally require unit tests, because the presence of bugs - usually indicates insufficient test coverage. -* Keep API compatibility in mind when you change code in MediaPipe framework - e.g., code in - [mediapipe/framework](https://github.com/google/mediapipe/tree/master/mediapipe/framework) - and - [mediapipe/calculators](https://github.com/google/mediapipe/tree/master/mediapipe/calculators). - Once MediaPipe has reached version 1 and we will not make - non-backward-compatible API changes without a major release. Reviewers of - your pull request will comment on any API compatibility issues. -* When you contribute a new feature to MediaPipe, the maintenance burden is - (by default) transferred to the MediaPipe team. This means that benefit of - the contribution must be compared against the cost of maintaining the - feature. -* Full new features (e.g., a new op implementing a cutting-edge algorithm) - typically will live in - [mediapipe/addons](https://github.com/google/mediapipe/addons) to get some - airtime before decision is made regarding whether they are to be migrated to - the core. - -#### License - -Include a license at the top of new files. - -* [C/C++ license example](https://github.com/google/mediapipe/blob/master/mediapipe/framework/calculator_base.cc#L1) -* [Java license example](https://github.com/google/mediapipe/blob/master/mediapipe/java/com/google/mediapipe/components/CameraHelper.java) - -Bazel BUILD files also need to include a license section, e.g., -[BUILD example](https://github.com/google/mediapipe/blob/master/mediapipe/framework/BUILD#L61). - -#### C++ coding style - -Changes to MediaPipe C++ code should conform to -[Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). - -Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on ubuntu:16.04, do: - -```bash -apt-get install -y clang-tidy -``` - -You can check a C/C++ file by doing: - - -```bash -clang-format --style=google > /tmp/my_cc_file.cc -diff /tmp/my_cc_file.cc -``` - -#### Coding style for other languages - -* [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html) -* [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html) -* [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml) -* [Google Objective-C Style Guide](https://google.github.io/styleguide/objcguide.html) - -#### Running sanity check - -If you have Docker installed on your system, you can perform a sanity check on -your changes by running the command: - -```bash -mediapipe/tools/ci_build/ci_build.sh CPU mediapipe/tools/ci_build/ci_sanity.sh -``` - -This will catch most license, Python coding style and BUILD file issues that -may exist in your changes. +MediaPipe team members will be assigned to review your pull requests. Once the bug/documentation fixes are verified, a MediaPipe team member will acknowledge your contribution in the pull request comments, manually merge the fixes into our internal codebase upstream, and apply the `to be closed` label to the pull request. These fixes will later be pushed to GitHub in the next release, and a MediaPipe team member will then close the pull request. diff --git a/README.md b/README.md index 66323f988..cef6213dd 100644 --- a/README.md +++ b/README.md @@ -22,13 +22,13 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Hands | Hair Segmentation -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :---------------: -[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) +Face Detection | Face Mesh | Iris | Hands +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: +[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) -Object Detection | Box Tracking | Objectron | KNIFT -:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT +:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) @@ -37,6 +37,7 @@ Object Detection :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ @@ -63,6 +64,8 @@ never leaves your device. ![visualizer_runner](docs/images/visualizer_runner.png) * [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection) +* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking) +* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) @@ -83,6 +86,8 @@ run code search using ## Publications +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single + Image](https://mediapipe.page.link/iris-blog) in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) @@ -123,7 +128,7 @@ run code search using * [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome MediaPipe related frameworks, libraries and software -* [Slack community](https://https://mediapipe.page.link/joinslack) for MediaPipe users +* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe diff --git a/docs/data/visualizer/sample_trace.binarypb b/docs/data/visualizer/sample_trace.binarypb index fce47d8e5..fc2562934 100644 Binary files a/docs/data/visualizer/sample_trace.binarypb and b/docs/data/visualizer/sample_trace.binarypb differ diff --git a/docs/framework_concepts/calculators.md b/docs/framework_concepts/calculators.md index 66aefb7b1..0ee3473e6 100644 --- a/docs/framework_concepts/calculators.md +++ b/docs/framework_concepts/calculators.md @@ -207,8 +207,8 @@ class SomeAudioVideoCalculator : public CalculatorBase { // particular type. SetAny() has the same effect as explicitly // setting the type to be the stream's type. cc->Outputs().Tag("VIDEO").Set(); - cc->Outputs().Get("AUDIO", 0).Set; - cc->Outputs().Get("AUDIO", 1).Set; + cc->Outputs().Get("AUDIO", 0).Set(); + cc->Outputs().Get("AUDIO", 1).Set(); return ::mediapipe::OkStatus(); } ``` @@ -400,13 +400,8 @@ node { ``` The diagram below shows how the `PacketClonerCalculator` defines its output -packets based on its series of input packets. +packets (bottom) based on its series of input packets (top). -| ![Graph using | -: PacketClonerCalculator](../images/packet_cloner_calculator.png) : -| :--------------------------------------------------------------------------: | -| *Each time it receives a packet on its TICK input stream, the | -: PacketClonerCalculator outputs the most recent packet from each of its input : -: streams. The sequence of output packets is determined by the sequene of : -: input packets and their timestamps. The timestamps are shows along the right : -: side of the diagram.* : +| ![Graph using PacketClonerCalculator](../images/packet_cloner_calculator.png) | +| :---------------------------------------------------------------------------: | +| *Each time it receives a packet on its TICK input stream, the PacketClonerCalculator outputs the most recent packet from each of its input streams. The sequence of output packets (bottom) is determined by the sequence of input packets (top) and their timestamps. The timestamps are shown along the right side of the diagram.* | diff --git a/docs/framework_concepts/packets.md b/docs/framework_concepts/packets.md index bb0b61d6a..bdf11c69f 100644 --- a/docs/framework_concepts/packets.md +++ b/docs/framework_concepts/packets.md @@ -20,7 +20,7 @@ Packets are generally created with `MediaPipe::Adopt()` (from packet.h). ```c++ // Create some data. -auto data = gtl::MakeUnique("constructor_argument"); +auto data = absl::make_unique("constructor_argument"); // Create a packet to own the data. Packet p = Adopt(data.release()); // Make a new packet with the same data and a different timestamp. diff --git a/docs/getting_started/building_examples.md b/docs/getting_started/building_examples.md index 2c3b6e77c..be50f9bc2 100644 --- a/docs/getting_started/building_examples.md +++ b/docs/getting_started/building_examples.md @@ -184,8 +184,8 @@ app: ### Prerequisite -1. Install [Xcode](https://developer.apple.com/xcode/), and additionally - install the Command Line Tools by: +1. Install [Xcode](https://developer.apple.com/xcode/), then install the + Command Line Tools using: ```bash xcode-select --install @@ -196,74 +196,38 @@ app: We recommend using [Homebrew](https://brew.sh/) to get the latest version. 3. Set Python 3.7 as the default Python version and install the Python "six" - library. - - To make Mediapipe work with TensorFlow, please set Python 3.7 as the default - Python version and install the Python "six" library. + library. This is needed for TensorFlow. ```bash pip3 install --user six ``` -4. Follow - [Apple's instructions](https://developer.apple.com/support/certificates/) to - obtain the required development certificates and provisioning profiles for - your iOS device. - - Tip: You can the following command to see the provisioning profiles you have - previously downloaded using Xcode: `open - ~/Library/MobileDevice/"Provisioning Profiles"`. If there are none, generate - and download a profile on - [Apple's developer site](https://developer.apple.com/account/resources/). - -5. Clone the MediaPipe repository. +4. Clone the MediaPipe repository. ```bash git clone https://github.com/google/mediapipe.git ``` -6. In the cloned MediaPipe repository, symlink or copy your provisioning profile - to `mediapipe/provisioning_profile.mobileprovision`, e.g., +### Set up a bundle ID prefix - ```bash - cd mediapipe - ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision - ``` +All iOS apps must have a bundle ID, and you must have a provisioning profile +that lets you install an app with that ID onto your phone. To avoid clashes +between different MediaPipe users, you need to configure a unique prefix for the +bundle IDs of our iOS demo apps. -### Option 1: Build with Bazel in Command Line +If you have a custom provisioning profile, see +[Custom provisioning](#custom-provisioning) below. -1. Modify the `bundle_id` field of the app's `ios_application` build target to - use your own identifier. For instance, for - [MediaPipe Hands](../solutions/hands.md), the `bundle_id` is in the - `HandTrackingGpuApp` target in the - [BUILD](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD) - file. +Otherwise, run this command to generate a unique prefix: -2. Again using [MediaPipe Hands](../solutions/hands.md) for example, run: +```bash +python3 mediapipe/examples/ios/link_local_profiles.py +``` - ```bash - bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp - ``` +### Create an Xcode project - You may see a permission request from `codesign` in order to sign the app. - - Tip: You can run this - [script](https://github.com/google/mediapipe/blob/master/build_ios_examples.sh) - to build all MediaPipe iOS example apps. - -3. In Xcode, open the `Devices and Simulators` window (command-shift-2). - -4. Make sure your device is connected. You will see a list of installed apps. - Press the "+" button under the list, and select the `.ipa` file built by - Bazel. - -5. You can now run the app on your device. - -### Option 2: Build in Xcode - -Note: This workflow requires a separate tool in addition to Bazel. If it fails -to work for some reason, please resort to the command-line build instructions in -the previous section. +This allows you to edit and debug one of the example apps in Xcode. It also +allows you to make use of automatic provisioning (see later section). 1. We will use a tool called [Tulsi](https://tulsi.bazel.build/) for generating Xcode projects from Bazel build configurations. @@ -283,25 +247,138 @@ the previous section. 2. Open `mediapipe/Mediapipe.tulsiproj` using the Tulsi app. - Important: If Tulsi displays an error saying "Bazel could not be found", - press the "Bazel..." button in the Packages tab and select the `bazel` - executable in your homebrew `/bin/` directory. + Tip: If Tulsi displays an error saying "Bazel could not be found", press the + "Bazel..." button in the Packages tab and select the `bazel` executable in + your homebrew `/bin/` directory. 3. Select the MediaPipe config in the Configs tab, then press the Generate button below. You will be asked for a location to save the Xcode project. Once the project is generated, it will be opened in Xcode. -4. You can now select any of the MediaPipe demos in the target menu, and build + If you get an error about bundle IDs, see the + [previous section](#set-up-a-bundle-id-prefix). + +### Set up provisioning + +To install applications on an iOS device, you need a provisioning profile. There +are two options: + +1. Automatic provisioning. This allows you to build and install an app to your + personal device. The provisining profile is managed by Xcode, and has to be + updated often (it is valid for about a week). + +2. Custom provisioning. This uses a provisioning profile associated with an + Apple developer account. These profiles have a longer validity period and + can target multiple devices, but you need a paid developer account with + Apple to obtain one. + +#### Automatic provisioning + +1. Create an Xcode project for MediaPipe, as discussed + [earlier](#create-an-xcode-project). + +2. In the project navigator in the left sidebar, select the "Mediapipe" + project. + +3. Select one of the application targets, e.g. HandTrackingGpuApp. + +4. Select the "Signing & Capabilities" tab. + +5. Check "Automatically manage signing", and confirm the dialog box. + +6. Select "_Your Name_ (Personal Team)" in the Team pop-up menu. + +7. This set-up needs to be done once for each application you want to install. + Repeat steps 3-6 as needed. + +This generates provisioning profiles for each app you have selected. Now we need +to tell Bazel to use them. We have provided a script to make this easier. + +1. In the terminal, to the `mediapipe` directory where you cloned the + repository. + +2. Run this command: + + ```bash + python3 mediapipe/examples/ios/link_local_profiles.py + ``` + +This will find and link the provisioning profile for all applications for which +you have enabled automatic provisioning in Xcode. + +Note: once a profile expires, Xcode will generate a new one; you must then run +this script again to link the updated profiles. + +#### Custom provisioning + +1. Obtain a provisioning profile from Apple. + +Tip: You can use this command to see the provisioning profiles you have +previously downloaded using Xcode: `open ~/Library/MobileDevice/"Provisioning +Profiles"`. If there are none, generate and download a profile on +[Apple's developer site](https://developer.apple.com/account/resources/). + +1. Symlink or copy your provisioning profile to + `mediapipe/mediapipe/provisioning_profile.mobileprovision`. + + ```bash + cd mediapipe + ln -s ~/Downloads/MyProvisioningProfile.mobileprovision mediapipe/provisioning_profile.mobileprovision + ``` + +Note: if you had previously set up automatic provisioning, you should remove the +`provisioning_profile.mobileprovision` symlink in each example's directory, +since it will take precedence over the common one. You can also overwrite it +with you own profile if you need a different profile for different apps. + +1. Open `mediapipe/examples/ios/bundle_id.bzl`, and change the + `BUNDLE_ID_PREFIX` to a prefix associated with your provisioning profile. + +### Build and run an app using Xcode + +1. Create the Xcode project, and make sure you have set up either automatic or + custom provisioning. + +2. You can now select any of the MediaPipe demos in the target menu, and build and run them as normal. - Note: When you ask Xcode to run an app, by default it will use the Debug - configuration. Some of our demos are computationally heavy; you may want to - use the Release configuration for better performance. +Note: When you ask Xcode to run an app, by default it will use the Debug +configuration. Some of our demos are computationally heavy; you may want to use +the Release configuration for better performance. - Tip: To switch build configuration in Xcode, click on the target menu, - choose "Edit Scheme...", select the Run action, and switch the Build - Configuration from Debug to Release. Note that this is set independently for - each target. +Tip: To switch build configuration in Xcode, click on the target menu, choose +"Edit Scheme...", select the Run action, and switch the Build Configuration from +Debug to Release. Note that this is set independently for each target. + +Tip: On the device, in Settings > General > Device Management, make sure the +developer (yourself) is trusted. + +### Build an app using the command line + +1. Make sure you have set up either automatic or custom provisioning. + +2. Using [MediaPipe Hands](../solutions/hands.md) for example, run: + + ```bash + bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp + ``` + + You may see a permission request from `codesign` in order to sign the app. + + Tip: If you are using custom provisioning, you can run this + [script](https://github.com/google/mediapipe/blob/master/build_ios_examples.sh) + to build all MediaPipe iOS example apps. + +3. In Xcode, open the `Devices and Simulators` window (command-shift-2). + +4. Make sure your device is connected. You will see a list of installed apps. + Press the "+" button under the list, and select the `.ipa` file built by + Bazel. + +5. You can now run the app on your device. + +Tip: On the device, in Settings > General > Device Management, make sure the +developer (yourself) is trusted. ## Desktop @@ -313,9 +390,6 @@ the previous section. bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu ``` - This will open up your webcam as long as it is connected and on. Any errors - is likely due to your webcam being not accessible. - 2. To run the application: ```bash @@ -323,6 +397,9 @@ the previous section. --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt ``` + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible. + ### Option 2: Running on GPU Note: This currently works only on Linux, and please first follow @@ -335,13 +412,13 @@ Note: This currently works only on Linux, and please first follow mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu ``` - This will open up your webcam as long as it is connected and on. Any errors - is likely due to your webcam being not accessible, or GPU drivers not setup - properly. - 2. To run the application: ```bash GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt ``` + + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible, or GPU drivers not setup + properly. diff --git a/docs/getting_started/hello_world_android.md b/docs/getting_started/hello_world_android.md index 2794ea4f8..e4e8286f7 100644 --- a/docs/getting_started/hello_world_android.md +++ b/docs/getting_started/hello_world_android.md @@ -43,8 +43,8 @@ We will be using the following graph, [`edge_detection_mobile_gpu.pbtxt`]: ``` # MediaPipe graph that performs GPU Sobel edge detection on a live video stream. -# Used in the examples -# mediapipe/examples/android/src/java/com/mediapipe/apps/basic. +# Used in the examples in +# mediapipe/examples/android/src/java/com/mediapipe/apps/basic and # mediapipe/examples/ios/edgedetectiongpu. # Images coming into and out of the graph. @@ -764,7 +764,7 @@ If you ran into any issues, please see the full code of the tutorial [CameraX]:https://developer.android.com/training/camerax [`CameraXPreviewHelper`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java [developer options]:https://developer.android.com/studio/debug/dev-options -[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt +[`edge_detection_mobile_gpu.pbtxt`]:https://github.com/google/mediapipe/tree/master/mediapipe/graphs/edge_detection/edge_detection_mobile_gpu.pbtxt [`EglManager`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/glutil/EglManager.java [`ExternalTextureConverter`]:https://github.com/google/mediapipe/tree/master/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java [`FrameLayout`]:https://developer.android.com/reference/android/widget/FrameLayout diff --git a/docs/getting_started/hello_world_desktop.md b/docs/getting_started/hello_world_desktop.md index 28a9aea8f..61e9b6471 100644 --- a/docs/getting_started/hello_world_desktop.md +++ b/docs/getting_started/hello_world_desktop.md @@ -18,7 +18,7 @@ nav_order: 5 2. To run the [`hello world`] example: ```bash - $ git clone https://github.com/google/mediapipe/mediapipe.git + $ git clone https://github.com/google/mediapipe.git $ cd mediapipe $ export GLOG_logtostderr=1 @@ -92,10 +92,10 @@ nav_order: 5 ```c++ CalculatorGraph graph; - RETURN_IF_ERROR(graph.Initialize(config)); - ASSIGN_OR_RETURN(OutputStreamPoller poller, - graph.AddOutputStreamPoller("out")); - RETURN_IF_ERROR(graph.StartRun({})); + MP_RETURN_IF_ERROR(graph.Initialize(config)); + MP_ASSIGN_OR_RETURN(OutputStreamPoller poller, + graph.AddOutputStreamPoller("out")); + MP_RETURN_IF_ERROR(graph.StartRun({})); ``` 5. The example then creates 10 packets (each packet contains a string "Hello @@ -105,9 +105,10 @@ nav_order: 5 ```c++ for (int i = 0; i < 10; ++i) { - RETURN_IF_ERROR(graph.AddPacketToInputStream("in", MakePacket("Hello World!").At(Timestamp(i)))); + MP_RETURN_IF_ERROR(graph.AddPacketToInputStream("in", + MakePacket("Hello World!").At(Timestamp(i)))); } - RETURN_IF_ERROR(graph.CloseInputStream("in")); + MP_RETURN_IF_ERROR(graph.CloseInputStream("in")); ``` 6. Through the `OutputStreamPoller` object the example then retrieves all 10 diff --git a/docs/getting_started/hello_world_ios.md b/docs/getting_started/hello_world_ios.md index 1c6c44961..2fdb028ce 100644 --- a/docs/getting_started/hello_world_ios.md +++ b/docs/getting_started/hello_world_ios.md @@ -56,7 +56,7 @@ node: { output_stream: "luma_video" } -# Applies the Sobel filter to luminance images sotred in RGB format. +# Applies the Sobel filter to luminance images stored in RGB format. node: { calculator: "SobelEdgesCalculator" input_stream: "luma_video" diff --git a/docs/getting_started/install.md b/docs/getting_started/install.md index 7374e244b..b9be6e498 100644 --- a/docs/getting_started/install.md +++ b/docs/getting_started/install.md @@ -70,9 +70,15 @@ apps, see these [instructions](./building_examples.md#ios). libopencv-imgproc-dev libopencv-video-dev ``` - [`opencv_linux.BUILD`] is configured for x86_64 by default. For Nvidia - Jetson and Raspberry Pi devices with ARM Ubuntu, the lib paths need to be - modified. + Debian 9 and Ubuntu 18.04 install the packages in + `/usr/lib/x86_64-linux-gnu`. MediaPipe's [`opencv_linux.BUILD`] and + [`ffmpeg_linux.BUILD`] are configured for this library path. Ubuntu 20.04 + may install the OpenCV and FFmpeg packages in `/usr/local`, Please follow + the option 3 below to modify the [`WORKSPACE`], [`opencv_linux.BUILD`] and + [`ffmpeg_linux.BUILD`] files accordingly. + + Moreover, for Nvidia Jetson and Raspberry Pi devices with ARM Ubuntu, the + library path needs to be modified like the following: ```bash sed -i "s/x86_64-linux-gnu/aarch64-linux-gnu/g" third_party/opencv_linux.BUILD @@ -85,11 +91,13 @@ apps, see these [instructions](./building_examples.md#ios). [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to - point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed - in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] - like the following: + Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and + [`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg + libraries. For example if OpenCV and FFmpeg are both manually installed in + "/usr/local/", you will need to update: (1) the "linux_opencv" and + "linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv" + cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg" + cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below: ```bash new_local_repository( @@ -98,6 +106,12 @@ apps, see these [instructions](./building_examples.md#ios). path = "/usr/local", ) + new_local_repository( + name = "linux_ffmpeg", + build_file = "@//third_party:ffmpeg_linux.BUILD", + path = "/usr/local", + ) + cc_library( name = "opencv", srcs = glob( @@ -110,8 +124,36 @@ apps, see these [instructions](./building_examples.md#ios). "lib/libopencv_videoio.so", ], ), - hdrs = glob(["include/opencv4/**/*.h*"]), - includes = ["include/opencv4/"], + hdrs = glob([ + # For OpenCV 3.x + "include/opencv2/**/*.h*", + # For OpenCV 4.x + # "include/opencv4/opencv2/**/*.h*", + ]), + includes = [ + # For OpenCV 3.x + "include/", + # For OpenCV 4.x + # "include/opencv4/", + ], + linkstatic = 1, + visibility = ["//visibility:public"], + ) + + cc_library( + name = "libffmpeg", + srcs = glob( + [ + "lib/libav*.so", + ], + ), + hdrs = glob(["include/libav*/*.h"]), + includes = ["include"], + linkopts = [ + "-lavcodec", + "-lavformat", + "-lavutil", + ], linkstatic = 1, visibility = ["//visibility:public"], ) @@ -158,6 +200,10 @@ apps, see these [instructions](./building_examples.md#ios). # Hello World! ``` +If you run into a build error, please read +[Troubleshooting](./troubleshooting.md) to find the solutions of several common +build issues. + ## Installing on CentOS **Disclaimer**: Running MediaPipe on CentOS is experimental. @@ -190,11 +236,13 @@ apps, see these [instructions](./building_examples.md#ios). Option 2. Build OpenCV from source code. - Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to - point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed - in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] - like the following: + Note: You may need to modify [`WORKSPACE`], [`opencv_linux.BUILD`] and + [`ffmpeg_linux.BUILD`] to point MediaPipe to your own OpenCV and FFmpeg + libraries. For example if OpenCV and FFmpeg are both manually installed in + "/usr/local/", you will need to update: (1) the "linux_opencv" and + "linux_ffmpeg" new_local_repository rules in [`WORKSPACE`], (2) the "opencv" + cc_library rule in [`opencv_linux.BUILD`], and (3) the "libffmpeg" + cc_library rule in [`ffmpeg_linux.BUILD`]. These 3 changes are shown below: ```bash new_local_repository( @@ -203,6 +251,12 @@ apps, see these [instructions](./building_examples.md#ios). path = "/usr/local", ) + new_local_repository( + name = "linux_ffmpeg", + build_file = "@//third_party:ffmpeg_linux.BUILD", + path = "/usr/local", + ) + cc_library( name = "opencv", srcs = glob( @@ -215,8 +269,36 @@ apps, see these [instructions](./building_examples.md#ios). "lib/libopencv_videoio.so", ], ), - hdrs = glob(["include/opencv4/**/*.h*"]), - includes = ["include/opencv4/"], + hdrs = glob([ + # For OpenCV 3.x + "include/opencv2/**/*.h*", + # For OpenCV 4.x + # "include/opencv4/opencv2/**/*.h*", + ]), + includes = [ + # For OpenCV 3.x + "include/", + # For OpenCV 4.x + # "include/opencv4/", + ], + linkstatic = 1, + visibility = ["//visibility:public"], + ) + + cc_library( + name = "libffmpeg", + srcs = glob( + [ + "lib/libav*.so", + ], + ), + hdrs = glob(["include/libav*/*.h"]), + includes = ["include"], + linkopts = [ + "-lavcodec", + "-lavformat", + "-lavutil", + ], linkstatic = 1, visibility = ["//visibility:public"], ) @@ -243,6 +325,10 @@ apps, see these [instructions](./building_examples.md#ios). # Hello World! ``` +If you run into a build error, please read +[Troubleshooting](./troubleshooting.md) to find the solutions of several common +build issues. + ## Installing on macOS 1. Prework: @@ -375,6 +461,10 @@ apps, see these [instructions](./building_examples.md#ios). # Hello World! ``` +If you run into a build error, please read +[Troubleshooting](./troubleshooting.md) to find the solutions of several common +build issues. + ## Installing on Windows **Disclaimer**: Running MediaPipe on Windows is experimental. @@ -454,13 +544,13 @@ next section. 9. Run the [Hello World desktop example](./hello_world_desktop.md). Note: For building MediaPipe on Windows, please add `--action_env - PYTHON_BIN_PATH="C:/path/to/python.exe"` to the build command. + PYTHON_BIN_PATH="C://path//to//python.exe"` to the build command. Alternatively, you can follow [issue 724](https://github.com/google/mediapipe/issues/724) to fix the python configuration manually. ``` - C:\Users\Username\mediapipe_repo>bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH="C:/python_36/python.exe" mediapipe/examples/desktop/hello_world + C:\Users\Username\mediapipe_repo>bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH="C://python_36//python.exe" mediapipe/examples/desktop/hello_world C:\Users\Username\mediapipe_repo>set GLOG_logtostderr=1 @@ -480,6 +570,10 @@ next section. ``` +If you run into a build error, please read +[Troubleshooting](./troubleshooting.md) to find the solutions of several common +build issues. + ## Installing on Windows Subsystem for Linux (WSL) Note: The pre-built OpenCV packages don't support cameras in WSL. Unless you @@ -603,6 +697,10 @@ cameras. Alternatively, you use a video file as input. # Hello World! ``` +If you run into a build error, please +read [Troubleshooting](./troubleshooting.md) to find the solutions of several +common build issues. + ## Installing using Docker This will use a Docker image that will isolate mediapipe's installation from the rest of the system. @@ -653,6 +751,10 @@ This will use a Docker image that will isolate mediapipe's installation from the # Hello World! ``` +If you run into a build error, please +read [Troubleshooting](./troubleshooting.md) to find the solutions of several +common build issues. + 4. Build a MediaPipe Android example. ```bash @@ -692,6 +794,7 @@ This will use a Docker image that will isolate mediapipe's installation from the [`WORKSPACE`]: https://github.com/google/mediapipe/blob/master/WORKSPACE [`opencv_linux.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_linux.BUILD +[`ffmpeg_linux.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_linux.BUILD [`opencv_macos.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_macos.BUILD [`ffmpeg_macos.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_macos.BUILD [`setup_opencv.sh`]: https://github.com/google/mediapipe/blob/master/setup_opencv.sh diff --git a/docs/getting_started/troubleshooting.md b/docs/getting_started/troubleshooting.md index 9d1bedac4..6d1a0e96e 100644 --- a/docs/getting_started/troubleshooting.md +++ b/docs/getting_started/troubleshooting.md @@ -12,6 +12,90 @@ nav_order: 10 {:toc} --- +## Missing Python binary path + +The error message: + +``` +ERROR: An error occurred during the fetch of repository 'local_execution_config_python': + Traceback (most recent call last): + File "/sandbox_path/external/org_tensorflow/third_party/py/python_configure.bzl", line 208 + get_python_bin(repository_ctx) + ... +Repository command failed +``` + +usually indicates that Bazel fails to find the local Python binary. To solve +this issue, please first find where the python binary is and then add +`--action_env PYTHON_BIN_PATH=` to the Bazel command like +the following: + +``` +bazel build -c opt \ + --define MEDIAPIPE_DISABLE_GPU=1 \ + --action_env PYTHON_BIN_PATH="/path/to/python" \ + mediapipe/examples/desktop/hello_world +``` + +## Missing necessary Python packages + +The error message: + +``` +ImportError: No module named numpy +Is numpy installed? +``` + +usually indicates that certain Python packages are not installed. Please run +`pip install` or `pip3 install` depending on your Python binary version to +install those packages. + +## Fail to fetch remote dependency repositories + +The error message: + +``` +ERROR: An error occurred during the fetch of repository 'org_tensorflow': + java.io.IOException: Error downloading [https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/77e9ffb9b2bfb1a4f7056e62d84039626923e328.tar.gz, https://github.com/tensorflow/tensorflow/archive/77e9ffb9b2bfb1a4f7056e62d84039626923e328.tar.gz] to /sandbox_path/external/org_tensorflow/77e9ffb9b2bfb1a4f7056e62d84039626923e328.tar.gz: Tried to reconnect at offset 9,944,151 but server didn't support it + +or + +WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_swift/releases/download/0.12.1/rules_swift.0.12.1.tar.gz failed: class java.net.ConnectException Connection timed out (Connection timed out) +``` + +usually indicates that Bazel fails to download necessary dependency repositories +that MediaPipe needs. MedaiPipe has several dependency repositories that are +hosted by Google sites. In some regions, you may need to set up a network proxy +or use a VPN to access those resources. You may also need to append +`--host_jvm_args "-DsocksProxyHost= -DsocksProxyPort="` +to the Bazel command. See +[this GitHub issue](https://github.com/google/mediapipe/issues/581#issuecomment-610356857) +for more details. + +If you believe that it's not a network issue, another possibility is that some +resources could be temporarily unavailable, please run `bazel clean --expunge` +and retry it later. If it's still not working, please file a GitHub issue with +the detailed error message. + +## Incorrect MediaPipe OpenCV config + +The error message: + +``` +error: undefined reference to 'cv::String::deallocate()' +error: undefined reference to 'cv::String::allocate(unsigned long)' +error: undefined reference to 'cv::VideoCapture::VideoCapture(cv::String const&)' +... +error: undefined reference to 'cv::putText(cv::InputOutputArray const&, cv::String const&, cv::Point, int, double, cv::Scalar, int, int, bool)' +``` + +usually indicates that OpenCV is not properly configured for MediaPipe. Please +take a look at the "Install OpenCV and FFmpeg" sections in +[Installation](./install.md) to see how to modify MediaPipe's WORKSPACE and +linux_opencv/macos_opencv/windows_opencv.BUILD files for your local opencv +libraries. [This GitHub issue](https://github.com/google/mediapipe/issues/666) +may also help. + ## Native method not found The error message: diff --git a/docs/images/mobile/iris_tracking_android_gpu.gif b/docs/images/mobile/iris_tracking_android_gpu.gif new file mode 100644 index 000000000..6214d9e5c Binary files /dev/null and b/docs/images/mobile/iris_tracking_android_gpu.gif differ diff --git a/docs/images/mobile/iris_tracking_android_gpu_small.gif b/docs/images/mobile/iris_tracking_android_gpu_small.gif new file mode 100644 index 000000000..050355476 Binary files /dev/null and b/docs/images/mobile/iris_tracking_android_gpu_small.gif differ diff --git a/docs/images/mobile/iris_tracking_depth_from_iris.gif b/docs/images/mobile/iris_tracking_depth_from_iris.gif new file mode 100644 index 000000000..2bcc80ea2 Binary files /dev/null and b/docs/images/mobile/iris_tracking_depth_from_iris.gif differ diff --git a/docs/images/mobile/iris_tracking_example.gif b/docs/images/mobile/iris_tracking_example.gif new file mode 100644 index 000000000..7988f3e95 Binary files /dev/null and b/docs/images/mobile/iris_tracking_example.gif differ diff --git a/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png b/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png new file mode 100644 index 000000000..1afb56395 Binary files /dev/null and b/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png differ diff --git a/docs/index.md b/docs/index.md index 39ea05b42..bd27df416 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,13 +22,13 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Hands | Hair Segmentation -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :---------------: -[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) +Face Detection | Face Mesh | Iris | Hands +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: +[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) -Object Detection | Box Tracking | Objectron | KNIFT -:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT +:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) @@ -37,6 +37,7 @@ Object Detection :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ @@ -63,6 +64,8 @@ never leaves your device. ![visualizer_runner](images/visualizer_runner.png) * [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection) +* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking) +* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) @@ -83,6 +86,8 @@ run code search using ## Publications +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single + Image](https://mediapipe.page.link/iris-blog) in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) @@ -123,7 +128,7 @@ run code search using * [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome MediaPipe related frameworks, libraries and software -* [Slack community](https://https://mediapipe.page.link/joinslack) for MediaPipe users +* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe diff --git a/docs/solutions/autoflip.md b/docs/solutions/autoflip.md index f78b4ae95..faad99e92 100644 --- a/docs/solutions/autoflip.md +++ b/docs/solutions/autoflip.md @@ -2,7 +2,7 @@ layout: default title: AutoFlip (Saliency-aware Video Cropping) parent: Solutions -nav_order: 9 +nav_order: 10 --- # AutoFlip: Saliency-aware Video Cropping diff --git a/docs/solutions/box_tracking.md b/docs/solutions/box_tracking.md index 84da8565d..5c73a97fb 100644 --- a/docs/solutions/box_tracking.md +++ b/docs/solutions/box_tracking.md @@ -2,7 +2,7 @@ layout: default title: Box Tracking parent: Solutions -nav_order: 6 +nav_order: 7 --- # MediaPipe Box Tracking @@ -112,7 +112,7 @@ examples. Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile diff --git a/docs/solutions/face_detection.md b/docs/solutions/face_detection.md index a8e844df4..4b9534b22 100644 --- a/docs/solutions/face_detection.md +++ b/docs/solutions/face_detection.md @@ -43,7 +43,7 @@ examples. Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index e81ac0f08..c678901a7 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -65,7 +65,7 @@ from the Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ## Models @@ -109,7 +109,7 @@ Please first see general instructions for Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile @@ -153,8 +153,8 @@ it, in the graph file modify the option of `ConstantSidePacketCalculator`. [Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs](https://arxiv.org/abs/1907.06724) ([poster](https://docs.google.com/presentation/d/1-LWwOMO9TzEVdrZ1CS1ndJzciRHfYDJfbSxH_ke_JRg/present?slide=id.g5986dd4b4c_4_212)) * Face detection model: - [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) -* Face landmark mode: - [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_landmark.tflite), + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite) +* Face landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) * [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) diff --git a/docs/solutions/hair_segmentation.md b/docs/solutions/hair_segmentation.md index 87361040a..0dec46951 100644 --- a/docs/solutions/hair_segmentation.md +++ b/docs/solutions/hair_segmentation.md @@ -2,7 +2,7 @@ layout: default title: Hair Segmentation parent: Solutions -nav_order: 4 +nav_order: 5 --- # MediaPipe Hair Segmentation @@ -24,7 +24,7 @@ examples. Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile diff --git a/docs/solutions/hands.md b/docs/solutions/hands.md index 4ba33f861..8edfd5850 100644 --- a/docs/solutions/hands.md +++ b/docs/solutions/hands.md @@ -2,7 +2,7 @@ layout: default title: Hands parent: Solutions -nav_order: 3 +nav_order: 4 --- # MediaPipe Hands @@ -66,7 +66,7 @@ and a Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ## Models @@ -132,7 +132,7 @@ examples. Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md new file mode 100644 index 000000000..d95b804ca --- /dev/null +++ b/docs/solutions/iris.md @@ -0,0 +1,204 @@ +--- +layout: default +title: Iris +parent: Solutions +nav_order: 3 +--- + +# MediaPipe Iris +{: .no_toc } + +1. TOC +{:toc} +--- + +## Overview + +A wide range of real-world applications, including computational photography +(glint reflection) and augmented reality effects (virtual avatars) rely on +accurately tracking the iris within an eye. This is a challenging task to solve +on mobile devices, due to the limited computing resources, variable light +conditions and the presence of occlusions, such as hair or people squinting. +Iris tracking can also be utilized to determine the metric distance of the +camera to the user. This can improve a variety of use cases, ranging from +virtual try-on of properly sized glasses and hats to accessibility features that +adopt the font size depending on the viewer’s distance. Often, sophisticated +specialized hardware is employed to compute the metric distance, limiting the +range of devices on which the solution could be applied. + +MediaPipe Iris is a ML solution for accurate iris estimation, able to track +landmarks involving the iris, pupil and the eye contours using a single RGB +camera, in real-time, without the need for specialized hardware. Through use of +iris landmarks, the solution is also able to determine the metric distance +between the subject and the camera with relative error less than 10%. Note that +iris tracking does not infer the location at which people are looking, nor does +it provide any form of identity recognition. With the cross-platfrom capability +of the MediaPipe framework, MediaPipe Iris can run on most modern +[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the +[web](#web). + +![iris_tracking_example.gif](../images/mobile/iris_tracking_example.gif) | +:------------------------------------------------------------------------: | +*Fig 1. Example of MediaPipe Iris: eyelid (red) and iris (blue) contours.* | + +## ML Pipeline + +The first step in the pipeline leverages [MediaPipe Face Mesh](./face_mesh.md), +which generates a mesh of the approximate face geometry. From this mesh, we +isolate the eye region in the original image for use in the subsequent iris +tracking step. + +The pipeline is implemented as a MediaPipe +[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) +that uses a +[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) +from the +[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark), +an +[iris landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_tracking/iris_landmark_left_and_right_gpu.pbtxt) +from the +[iris landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark), +and renders using a dedicated +[iris-and-depth renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt). +The +[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) +internally uses a +[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt) +from the +[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection). + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +## Models + +### Face Detection Model + +The face detector is the same [BlazeFace](https://arxiv.org/abs/1907.05047) +model used in [MediaPipe Face Detection](./face_detection.md). + +### Face Landmark Model + +The face landmark model is the same as in [MediaPipe Face Mesh](./face_mesh.md). +You can also find more details in this +[paper](https://arxiv.org/abs/1907.06724). + +### Iris Landmark Model + +The iris model takes an image patch of the eye region and estimates both the eye +landmarks (along the eyelid) and iris landmarks (along ths iris contour). You +can find more details in this [paper](https://arxiv.org/abs/2006.11341). + +![iris_tracking_eye_and_iris_landmarks.png](../images/mobile/iris_tracking_eye_and_iris_landmarks.png) | +:----------------------------------------------------------------------------------------------------: | +*Fig 2. Eye landmarks (red) and iris landmarks (green).* | + +## Depth-from-Iris + +MediaPipe Iris is able to determine the metric distance of a subject to the +camera with less than 10% error, without requiring any specialized hardware. +This is done by relying on the fact that the horizontal iris diameter of the +human eye remains roughly constant at 11.7±0.5 mm across a wide population, +along with some simple geometric arguments. For more details please refer to our +[Google AI Blog post](https://mediapipe.page.link/iris-blog). + +![iris_tracking_depth_from_iris.gif](../images/mobile/iris_tracking_depth_from_iris.gif) | +:--------------------------------------------------------------------------------------------: | +*Fig 3. (Left) MediaPipe Iris predicting metric distance in cm on a Pixel 2 from iris tracking without use of a depth sensor. (Right) Ground-truth depth.* | + +## Example Apps + +Please first see general instructions for +[Android](../getting_started/building_examples.md#android), +[iOS](../getting_started/building_examples.md#ios) and +[desktop](../getting_started/building_examples.md#desktop) on how to build +MediaPipe examples. + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +### Mobile + +* Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) +* Android target: + [(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1cywcNtqk764TlZf1lvSTV4F3NGB2aL1R/view?usp=sharing) + [`mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu:iristrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD) +* iOS target: + [`mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp`](http:/mediapipe/examples/ios/iristrackinggpu/BUILD) + +### Desktop + +#### Live Camera Input + +Please first see general instructions for +[desktop](../getting_started/building_examples.md#desktop) on how to build +MediaPipe examples. + +* Running on CPU + * Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/iris_tracking:iris_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/iris_tracking/BUILD) +* Running on GPU + * Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/iris_tracking:iris_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/iris_tracking/BUILD) + +#### Video File Input + +1. To build the application, run: + + ```bash + bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/iris_tracking:iris_tracking_cpu_video_input + ``` + +2. To run the application, replace `` and `` in the command below with your own paths: + + ``` + bazel-bin/mediapipe/examples/desktop/iris_tracking/iris_tracking_cpu_video_input \ + --calculator_graph_config_file=mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= + ``` + +#### Single-image Depth Estimation + +1. To build the application, run: + + ```bash + bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/iris_tracking:iris_depth_from_image_desktop + ``` + +2. To run the application, replace `` and `` in the command below with your own paths: + + ```bash + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop \ + --input_image_path= --output_image_path= + ``` + +### Web + +Please refer to [these instructions](../index.md#mediapipe-on-the-web). + +## Resources + +* Google AI Blog: [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation + from a Single Image](https://mediapipe.page.link/iris-blog) +* Paper: + [Real-time Pupil Tracking from Monocular Video for Digital Puppetry](https://arxiv.org/abs/2006.11341) + ([presentation](https://youtu.be/cIhXkiiapQI)) +* Face detection model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite) +* Face landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), + [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) +* Iris landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark.tflite) +* [Model card](https://mediapipe.page.link/iris-mc) diff --git a/docs/solutions/knift.md b/docs/solutions/knift.md index ec2eec154..942ad255f 100644 --- a/docs/solutions/knift.md +++ b/docs/solutions/knift.md @@ -2,7 +2,7 @@ layout: default title: KNIFT (Template-based Feature Matching) parent: Solutions -nav_order: 8 +nav_order: 9 --- # MediaPipe KNIFT @@ -72,7 +72,7 @@ Please first see general instructions for Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). * Graph: [`mediapipe/graphs/template_matching/template_matching_mobile_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/template_matching/template_matching_mobile_cpu.pbtxt) diff --git a/docs/solutions/media_sequence.md b/docs/solutions/media_sequence.md index bee6d8951..dc3ef63bc 100644 --- a/docs/solutions/media_sequence.md +++ b/docs/solutions/media_sequence.md @@ -2,7 +2,7 @@ layout: default title: Dataset Preparation with MediaSequence parent: Solutions -nav_order: 10 +nav_order: 11 --- # Dataset Preparation with MediaSequence diff --git a/docs/solutions/object_detection.md b/docs/solutions/object_detection.md index a92e57e7d..340e1990a 100644 --- a/docs/solutions/object_detection.md +++ b/docs/solutions/object_detection.md @@ -2,7 +2,7 @@ layout: default title: Object Detection parent: Solutions -nav_order: 5 +nav_order: 6 --- # MediaPipe Object Detection @@ -19,7 +19,7 @@ nav_order: 5 Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Mobile @@ -95,8 +95,8 @@ Please first see general instructions for ``` GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ - --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ - --input_side_packets=input_video_path=,output_video_path= + --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= ``` * With a TensorFlow Model @@ -131,8 +131,8 @@ Please first see general instructions for ```bash GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ - --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ - --input_side_packets=input_video_path=,output_video_path= + --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= ``` ### Coral diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index c142bfdf9..0239f174c 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -2,7 +2,7 @@ layout: default title: Objectron (3D Object Detection) parent: Solutions -nav_order: 7 +nav_order: 8 --- # MediaPipe Objectron @@ -156,7 +156,7 @@ Please first see general instructions for Note: To visualize a graph, copy the graph and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how to visualize its associated subgraphs, please see -[visualizer documentation](../visualizer.md). +[visualizer documentation](../tools/visualizer.md). ### Objectron for Shoes diff --git a/docs/solutions/solutions.md b/docs/solutions/solutions.md index 73331526a..840b5ce3d 100644 --- a/docs/solutions/solutions.md +++ b/docs/solutions/solutions.md @@ -14,12 +14,13 @@ has_toc: false --- - + []() | Android | iOS | Desktop | Web | Coral :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ diff --git a/docs/solutions/youtube_8m.md b/docs/solutions/youtube_8m.md index 5179e3aa5..ebb51dcc4 100644 --- a/docs/solutions/youtube_8m.md +++ b/docs/solutions/youtube_8m.md @@ -2,7 +2,7 @@ layout: default title: YouTube-8M Feature Extraction and Model Inference parent: Solutions -nav_order: 11 +nav_order: 12 --- # YouTube-8M Feature Extraction and Model Inference diff --git a/docs/tools/tracing_and_profiling.md b/docs/tools/tracing_and_profiling.md index 472e52a7d..a0188836b 100644 --- a/docs/tools/tracing_and_profiling.md +++ b/docs/tools/tracing_and_profiling.md @@ -33,13 +33,12 @@ command line option: `--define MEDIAPIPE_PROFILING=0`. To enable tracing and profiling, the `CalculatorGraphConfig` (in [calculator.proto](https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto)) representing the graph must have a `profiler_config` message at its root. Here -is a simple setup that turns on a few extra options: +is a simple setup that turns on tracing and keeps 100 seconds of timing events: ``` profiler_config { - enable_profiler: true trace_enabled: true - trace_log_count: 5 + trace_log_interval_count: 200 } ``` diff --git a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen index 4830f5b16..b8e8f95bf 100644 --- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen +++ b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen @@ -10,6 +10,7 @@ "mediapipe/examples/ios/facemeshgpu/BUILD", "mediapipe/examples/ios/handdetectiongpu/BUILD", "mediapipe/examples/ios/handtrackinggpu/BUILD", + "mediapipe/examples/ios/iristrackinggpu/BUILD", "mediapipe/examples/ios/multihandtrackinggpu/BUILD", "mediapipe/examples/ios/objectdetectioncpu/BUILD", "mediapipe/examples/ios/objectdetectiongpu/BUILD" @@ -21,6 +22,7 @@ "//mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp", "//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp", "//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp", + "//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp", "//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp", "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp", "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp", @@ -88,6 +90,8 @@ "mediapipe/examples/ios/handdetectiongpu/Base.lproj", "mediapipe/examples/ios/handtrackinggpu", "mediapipe/examples/ios/handtrackinggpu/Base.lproj", + "mediapipe/examples/ios/iristrackinggpu", + "mediapipe/examples/ios/iristrackinggpu/Base.lproj", "mediapipe/examples/ios/multihandtrackinggpu", "mediapipe/examples/ios/multihandtrackinggpu/Base.lproj", "mediapipe/examples/ios/objectdetectioncpu", @@ -110,6 +114,7 @@ "mediapipe/graphs/hand_tracking", "mediapipe/graphs/object_detection", "mediapipe/models", + "mediapipe/modules", "mediapipe/objc", "mediapipe/util", "mediapipe/util/android", diff --git a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf index c2c54aeeb..9f1ab5d66 100644 --- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf +++ b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf @@ -17,6 +17,7 @@ "mediapipe/examples/ios/facemeshgpu", "mediapipe/examples/ios/handdetectiongpu", "mediapipe/examples/ios/handtrackinggpu", + "mediapipe/examples/ios/iristrackinggpu", "mediapipe/examples/ios/multihandtrackinggpu", "mediapipe/examples/ios/objectdetectioncpu", "mediapipe/examples/ios/objectdetectiongpu" diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 6196bed5b..a0b22054f 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -316,6 +316,37 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "concatenate_normalized_landmark_list_calculator", + srcs = ["concatenate_normalized_landmark_list_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":concatenate_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "concatenate_normalized_landmark_list_calculator_test", + srcs = ["concatenate_normalized_landmark_list_calculator_test.cc"], + deps = [ + ":concatenate_normalized_landmark_list_calculator", + ":concatenate_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], +) + cc_test( name = "concatenate_vector_calculator_test", srcs = ["concatenate_vector_calculator_test.cc"], @@ -450,6 +481,21 @@ cc_library( alwayslink = 1, ) +cc_test( + name = "mux_calculator_test", + srcs = ["mux_calculator_test.cc"], + deps = [ + ":mux_calculator", + ":round_robin_demux_calculator", + ":split_vector_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + ], +) + cc_library( name = "packet_cloner_calculator", srcs = ["packet_cloner_calculator.cc"], @@ -947,7 +993,6 @@ cc_test( "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:parse_text_proto", - "//mediapipe/framework/port:status", ], ) diff --git a/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc new file mode 100644 index 000000000..54c3e05b9 --- /dev/null +++ b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc @@ -0,0 +1,84 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT +#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT + +#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Concatenates several NormalizedLandmarkList protos following stream index +// order. This class assumes that every input stream contains a +// NormalizedLandmarkList proto object. +class ConcatenateNormalizedLandmarkListCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().NumEntries() != 0); + RET_CHECK(cc->Outputs().NumEntries() == 1); + + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + cc->Inputs().Index(i).Set(); + } + + cc->Outputs().Index(0).Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + only_emit_if_all_present_ = + cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>() + .only_emit_if_all_present(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (only_emit_if_all_present_) { + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + if (cc->Inputs().Index(i).IsEmpty()) return ::mediapipe::OkStatus(); + } + } + + NormalizedLandmarkList output; + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + if (cc->Inputs().Index(i).IsEmpty()) continue; + const NormalizedLandmarkList& input = + cc->Inputs().Index(i).Get(); + for (int j = 0; j < input.landmark_size(); ++j) { + const NormalizedLandmark& input_landmark = input.landmark(j); + *output.add_landmark() = input_landmark; + } + } + cc->Outputs().Index(0).AddPacket( + MakePacket(output).At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } + + private: + bool only_emit_if_all_present_; +}; + +REGISTER_CALCULATOR(ConcatenateNormalizedLandmarkListCalculator); + +} // namespace mediapipe + +// NOLINTNEXTLINE +#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc new file mode 100644 index 000000000..fd116ece7 --- /dev/null +++ b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc @@ -0,0 +1,184 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +constexpr float kLocationValue = 3; + +NormalizedLandmarkList GenerateLandmarks(int landmarks_size, + int value_multiplier) { + NormalizedLandmarkList landmarks; + for (int i = 0; i < landmarks_size; ++i) { + NormalizedLandmark* landmark = landmarks.add_landmark(); + landmark->set_x(value_multiplier * kLocationValue); + landmark->set_y(value_multiplier * kLocationValue); + landmark->set_z(value_multiplier * kLocationValue); + } + return landmarks; +} + +void ValidateCombinedLandmarks( + const std::vector& inputs, + const NormalizedLandmarkList& result) { + int element_id = 0; + int expected_size = 0; + for (int i = 0; i < inputs.size(); ++i) { + const NormalizedLandmarkList& landmarks_i = inputs[i]; + expected_size += landmarks_i.landmark_size(); + for (int j = 0; j < landmarks_i.landmark_size(); ++j) { + const NormalizedLandmark& expected = landmarks_i.landmark(j); + const NormalizedLandmark& got = result.landmark(element_id); + EXPECT_FLOAT_EQ(expected.x(), got.x()); + EXPECT_FLOAT_EQ(expected.y(), got.y()); + EXPECT_FLOAT_EQ(expected.z(), got.z()); + ++element_id; + } + } + EXPECT_EQ(expected_size, result.landmark_size()); +} + +void AddInputLandmarkLists( + const std::vector& input_landmarks_vec, + int64 timestamp, CalculatorRunner* runner) { + for (int i = 0; i < input_landmarks_vec.size(); ++i) { + runner->MutableInputs()->Index(i).packets.push_back( + MakePacket(input_landmarks_vec[i]) + .At(Timestamp(timestamp))); + } +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, EmptyVectorInputs) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList empty_list; + std::vector inputs = {empty_list, empty_list, + empty_list}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(0, outputs[0].Get().landmark_size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, OneTimestamp) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + NormalizedLandmarkList input_1 = + GenerateLandmarks(/*landmarks_size=*/1, /*value_multiplier=*/1); + NormalizedLandmarkList input_2 = + GenerateLandmarks(/*landmarks_size=*/2, /*value_multiplier=*/2); + std::vector inputs = {input_0, input_1, input_2}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, + TwoInputsAtTwoTimestamps) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + NormalizedLandmarkList input_1 = + GenerateLandmarks(/*landmarks_size=*/1, /*value_multiplier=*/1); + NormalizedLandmarkList input_2 = + GenerateLandmarks(/*landmarks_size=*/2, /*value_multiplier=*/2); + std::vector inputs = {input_0, input_1, input_2}; + { AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); } + { AddInputLandmarkLists(inputs, /*timestamp=*/2, &runner); } + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(2, outputs.size()); + { + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); + } + { + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + const NormalizedLandmarkList& result = + outputs[1].Get(); + ValidateCombinedLandmarks(inputs, result); + } +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, + OneEmptyStreamStillOutput) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/2, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + std::vector inputs = {input_0}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, OneEmptyStreamNoOutput) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/ + "[mediapipe.ConcatenateVectorCalculatorOptions.ext]: " + "{only_emit_if_all_present: true}", + /*num_inputs=*/2, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + std::vector inputs = {input_0}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(0, outputs.size()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/gate_calculator.cc b/mediapipe/calculators/core/gate_calculator.cc index aedd01b64..ea0f7b81b 100644 --- a/mediapipe/calculators/core/gate_calculator.cc +++ b/mediapipe/calculators/core/gate_calculator.cc @@ -56,12 +56,19 @@ std::string ToString(GateState state) { // disallowing the corresponding packets in other input streams. The behavior // can be inverted with a calculator option. // +// ALLOW or DISALLOW can also be specified as an input side packet. The rules +// for evaluation remain the same as above. +// +// ALLOW/DISALLOW inputs must be specified either using input stream or +// via input side packet but not both. +// // Intended to be used with the default input stream handler, which synchronizes // all data input streams with the ALLOW/DISALLOW control input stream. // // Example config: // node { // calculator: "GateCalculator" +// input_side_packet: "ALLOW:allow" or "DISALLOW:disallow" // input_stream: "input_stream0" // input_stream: "input_stream1" // input_stream: "input_streamN" @@ -75,10 +82,40 @@ class GateCalculator : public CalculatorBase { public: GateCalculator() {} + static ::mediapipe::Status CheckAndInitAllowDisallowInputs( + CalculatorContract* cc) { + bool input_via_side_packet = cc->InputSidePackets().HasTag("ALLOW") || + cc->InputSidePackets().HasTag("DISALLOW"); + bool input_via_stream = + cc->Inputs().HasTag("ALLOW") || cc->Inputs().HasTag("DISALLOW"); + // Only one of input_side_packet or input_stream may specify ALLOW/DISALLOW + // input. + RET_CHECK(input_via_side_packet ^ input_via_stream); + + if (input_via_side_packet) { + RET_CHECK(cc->InputSidePackets().HasTag("ALLOW") ^ + cc->InputSidePackets().HasTag("DISALLOW")); + + if (cc->InputSidePackets().HasTag("ALLOW")) { + cc->InputSidePackets().Tag("ALLOW").Set(); + } else { + cc->InputSidePackets().Tag("DISALLOW").Set(); + } + } else { + RET_CHECK(cc->Inputs().HasTag("ALLOW") ^ cc->Inputs().HasTag("DISALLOW")); + + if (cc->Inputs().HasTag("ALLOW")) { + cc->Inputs().Tag("ALLOW").Set(); + } else { + cc->Inputs().Tag("DISALLOW").Set(); + } + } + return ::mediapipe::OkStatus(); + } + static ::mediapipe::Status GetContract(CalculatorContract* cc) { - // Assume that input streams do not have a tag and that gating signal is - // tagged either ALLOW or DISALLOW. - RET_CHECK(cc->Inputs().HasTag("ALLOW") ^ cc->Inputs().HasTag("DISALLOW")); + RET_CHECK_OK(CheckAndInitAllowDisallowInputs(cc)); + const int num_data_streams = cc->Inputs().NumEntries(""); RET_CHECK_GE(num_data_streams, 1); RET_CHECK_EQ(cc->Outputs().NumEntries(""), num_data_streams) @@ -88,11 +125,6 @@ class GateCalculator : public CalculatorBase { cc->Inputs().Get("", i).SetAny(); cc->Outputs().Get("", i).SetSameAs(&cc->Inputs().Get("", i)); } - if (cc->Inputs().HasTag("ALLOW")) { - cc->Inputs().Tag("ALLOW").Set(); - } else { - cc->Inputs().Tag("DISALLOW").Set(); - } if (cc->Outputs().HasTag("STATE_CHANGE")) { cc->Outputs().Tag("STATE_CHANGE").Set(); @@ -102,6 +134,17 @@ class GateCalculator : public CalculatorBase { } ::mediapipe::Status Open(CalculatorContext* cc) final { + use_side_packet_for_allow_disallow_ = false; + if (cc->InputSidePackets().HasTag("ALLOW")) { + use_side_packet_for_allow_disallow_ = true; + allow_by_side_packet_decision_ = + cc->InputSidePackets().Tag("ALLOW").Get(); + } else if (cc->InputSidePackets().HasTag("DISALLOW")) { + use_side_packet_for_allow_disallow_ = true; + allow_by_side_packet_decision_ = + !cc->InputSidePackets().Tag("DISALLOW").Get(); + } + cc->SetOffset(TimestampDiff(0)); num_data_streams_ = cc->Inputs().NumEntries(""); last_gate_state_ = GATE_UNINITIALIZED; @@ -115,14 +158,18 @@ class GateCalculator : public CalculatorBase { ::mediapipe::Status Process(CalculatorContext* cc) final { bool allow = empty_packets_as_allow_; - if (cc->Inputs().HasTag("ALLOW") && !cc->Inputs().Tag("ALLOW").IsEmpty()) { - allow = cc->Inputs().Tag("ALLOW").Get(); + if (use_side_packet_for_allow_disallow_) { + allow = allow_by_side_packet_decision_; + } else { + if (cc->Inputs().HasTag("ALLOW") && + !cc->Inputs().Tag("ALLOW").IsEmpty()) { + allow = cc->Inputs().Tag("ALLOW").Get(); + } + if (cc->Inputs().HasTag("DISALLOW") && + !cc->Inputs().Tag("DISALLOW").IsEmpty()) { + allow = !cc->Inputs().Tag("DISALLOW").Get(); + } } - if (cc->Inputs().HasTag("DISALLOW") && - !cc->Inputs().Tag("DISALLOW").IsEmpty()) { - allow = !cc->Inputs().Tag("DISALLOW").Get(); - } - const GateState new_gate_state = allow ? GATE_ALLOW : GATE_DISALLOW; if (cc->Outputs().HasTag("STATE_CHANGE")) { @@ -157,6 +204,8 @@ class GateCalculator : public CalculatorBase { GateState last_gate_state_ = GATE_UNINITIALIZED; int num_data_streams_; bool empty_packets_as_allow_; + bool use_side_packet_for_allow_disallow_; + bool allow_by_side_packet_decision_; }; REGISTER_CALCULATOR(GateCalculator); diff --git a/mediapipe/calculators/core/gate_calculator_test.cc b/mediapipe/calculators/core/gate_calculator_test.cc index 8a7272416..fc34f6e97 100644 --- a/mediapipe/calculators/core/gate_calculator_test.cc +++ b/mediapipe/calculators/core/gate_calculator_test.cc @@ -24,6 +24,21 @@ namespace { class GateCalculatorTest : public ::testing::Test { protected: + // Helper to run a graph and return status. + static ::mediapipe::Status RunGraph(const std::string& proto) { + auto runner = absl::make_unique( + ParseTextProtoOrDie(proto)); + return runner->Run(); + } + + // Use this when ALLOW/DISALLOW input is provided as a side packet. + void RunTimeStep(int64 timestamp, bool stream_payload) { + runner_->MutableInputs()->Get("", 0).packets.push_back( + MakePacket(stream_payload).At(Timestamp(timestamp))); + MP_ASSERT_OK(runner_->Run()) << "Calculator execution failed."; + } + + // Use this when ALLOW/DISALLOW input is provided as an input stream. void RunTimeStep(int64 timestamp, const std::string& control_tag, bool control) { runner_->MutableInputs()->Get("", 0).packets.push_back( @@ -31,7 +46,6 @@ class GateCalculatorTest : public ::testing::Test { runner_->MutableInputs() ->Tag(control_tag) .packets.push_back(MakePacket(control).At(Timestamp(timestamp))); - MP_ASSERT_OK(runner_->Run()) << "Calculator execution failed."; } @@ -46,6 +60,136 @@ class GateCalculatorTest : public ::testing::Test { std::unique_ptr runner_; }; +TEST_F(GateCalculatorTest, InvalidInputs) { + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_stream: "ALLOW:gating_stream" + input_stream: "DISALLOW:gating_stream" + output_stream: "test_output" + )"))); + + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_side_packet: "ALLOW:gating_stream" + input_side_packet: "DISALLOW:gating_stream" + output_stream: "test_output" + )"))); + + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_stream: "ALLOW:gating_stream" + input_side_packet: "ALLOW:gating_stream" + output_stream: "test_output" + )"))); + + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_stream: "DISALLOW:gating_stream" + input_side_packet: "DISALLOW:gating_stream" + output_stream: "test_output" + )"))); + + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_stream: "ALLOW:gating_stream" + input_side_packet: "DISALLOW:gating_stream" + output_stream: "test_output" + )"))); + + EXPECT_TRUE(absl::IsInternal(GateCalculatorTest::RunGraph(R"( + calculator: "GateCalculator" + input_stream: "test_input" + input_stream: "DISALLOW:gating_stream" + input_side_packet: "ALLOW:gating_stream" + output_stream: "test_output" + )"))); +} + +TEST_F(GateCalculatorTest, AllowByALLOWSidePacketSetToTrue) { + SetRunner(R"( + calculator: "GateCalculator" + input_side_packet: "ALLOW:gating_stream" + input_stream: "test_input" + output_stream: "test_output" + )"); + runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(true)); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(2, output.size()); + EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value()); + EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value()); + EXPECT_EQ(true, output[0].Get()); + EXPECT_EQ(false, output[1].Get()); +} + +TEST_F(GateCalculatorTest, AllowByDisallowSidePacketSetToFalse) { + SetRunner(R"( + calculator: "GateCalculator" + input_side_packet: "DISALLOW:gating_stream" + input_stream: "test_input" + output_stream: "test_output" + )"); + runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(false)); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(2, output.size()); + EXPECT_EQ(kTimestampValue0, output[0].Timestamp().Value()); + EXPECT_EQ(kTimestampValue1, output[1].Timestamp().Value()); + EXPECT_EQ(true, output[0].Get()); + EXPECT_EQ(false, output[1].Get()); +} + +TEST_F(GateCalculatorTest, DisallowByALLOWSidePacketSetToFalse) { + SetRunner(R"( + calculator: "GateCalculator" + input_side_packet: "ALLOW:gating_stream" + input_stream: "test_input" + output_stream: "test_output" + )"); + runner()->MutableSidePackets()->Tag("ALLOW") = Adopt(new bool(false)); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(0, output.size()); +} + +TEST_F(GateCalculatorTest, DisallowByDISALLOWSidePacketSetToTrue) { + SetRunner(R"( + calculator: "GateCalculator" + input_side_packet: "DISALLOW:gating_stream" + input_stream: "test_input" + output_stream: "test_output" + )"); + runner()->MutableSidePackets()->Tag("DISALLOW") = Adopt(new bool(true)); + + constexpr int64 kTimestampValue0 = 42; + RunTimeStep(kTimestampValue0, true); + constexpr int64 kTimestampValue1 = 43; + RunTimeStep(kTimestampValue1, false); + + const std::vector& output = runner()->Outputs().Get("", 0).packets; + ASSERT_EQ(0, output.size()); +} + TEST_F(GateCalculatorTest, Allow) { SetRunner(R"( calculator: "GateCalculator" diff --git a/mediapipe/calculators/core/immediate_mux_calculator.cc b/mediapipe/calculators/core/immediate_mux_calculator.cc index cb930bed7..007fbf73e 100644 --- a/mediapipe/calculators/core/immediate_mux_calculator.cc +++ b/mediapipe/calculators/core/immediate_mux_calculator.cc @@ -37,6 +37,10 @@ namespace mediapipe { // the RoundRobinDemuxCalculator. Therefore, packets from different // input streams are normally not expected to have the same timestamp. // +// NOTE: this calculator can drop packets non-deterministically, depending on +// how fast the input streams are fed. In most cases, MuxCalculator should be +// preferred. In particular, dropping packets can interfere with rate limiting +// mechanisms. class ImmediateMuxCalculator : public CalculatorBase { public: // This calculator combines any set of input streams into a single @@ -76,6 +80,9 @@ REGISTER_CALCULATOR(ImmediateMuxCalculator); if (!packet.IsEmpty()) { if (packet.Timestamp() >= cc->Outputs().Index(0).NextTimestampBound()) { cc->Outputs().Index(0).AddPacket(packet); + } else { + LOG_FIRST_N(WARNING, 5) + << "Dropping a packet with timestamp " << packet.Timestamp(); } if (cc->Outputs().NumEntries() >= 2) { Timestamp output_timestamp = std::max( diff --git a/mediapipe/calculators/core/mux_calculator.cc b/mediapipe/calculators/core/mux_calculator.cc index 1d1ae1904..8ca25bdd0 100644 --- a/mediapipe/calculators/core/mux_calculator.cc +++ b/mediapipe/calculators/core/mux_calculator.cc @@ -17,28 +17,49 @@ namespace mediapipe { +namespace { +constexpr char kSelectTag[] = "SELECT"; +constexpr char kInputTag[] = "INPUT"; +} // namespace + // A Calculator that selects an input stream from "INPUT:0", "INPUT:1", ..., -// using the integer value (0, 1, ...) in the packet on the "SELECT" input +// using the integer value (0, 1, ...) in the packet on the kSelectTag input // stream, and passes the packet on the selected input stream to the "OUTPUT" // output stream. +// The kSelectTag input can also be passed in as an input side packet, instead +// of as an input stream. Either of input stream or input side packet must be +// specified but not both. // // Note that this calculator defaults to use MuxInputStreamHandler, which is -// required for this calculator. +// required for this calculator. However, it can be overridden to work with +// other InputStreamHandlers. Check out the unit tests on for an example usage +// with DefaultInputStreamHandler. class MuxCalculator : public CalculatorBase { public: + static ::mediapipe::Status CheckAndInitAllowDisallowInputs( + CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag(kSelectTag) ^ + cc->InputSidePackets().HasTag(kSelectTag)); + if (cc->Inputs().HasTag(kSelectTag)) { + cc->Inputs().Tag(kSelectTag).Set(); + } else { + cc->InputSidePackets().Tag(kSelectTag).Set(); + } + return ::mediapipe::OkStatus(); + } + static ::mediapipe::Status GetContract(CalculatorContract* cc) { - cc->Inputs().Tag("SELECT").Set(); - CollectionItemId data_input_id = cc->Inputs().BeginId("INPUT"); + RET_CHECK_OK(CheckAndInitAllowDisallowInputs(cc)); + CollectionItemId data_input_id = cc->Inputs().BeginId(kInputTag); PacketType* data_input0 = &cc->Inputs().Get(data_input_id); data_input0->SetAny(); ++data_input_id; - for (; data_input_id < cc->Inputs().EndId("INPUT"); ++data_input_id) { + for (; data_input_id < cc->Inputs().EndId(kInputTag); ++data_input_id) { cc->Inputs().Get(data_input_id).SetSameAs(data_input0); } RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); cc->Outputs().Tag("OUTPUT").SetSameAs(data_input0); - // Assign this calculator's default InputStreamHandler. cc->SetInputStreamHandler("MuxInputStreamHandler"); MediaPipeOptions options; cc->SetInputStreamHandlerOptions(options); @@ -47,16 +68,24 @@ class MuxCalculator : public CalculatorBase { } ::mediapipe::Status Open(CalculatorContext* cc) final { - select_input_ = cc->Inputs().GetId("SELECT", 0); - data_input_base_ = cc->Inputs().GetId("INPUT", 0); - num_data_inputs_ = cc->Inputs().NumEntries("INPUT"); + use_side_packet_select_ = false; + if (cc->InputSidePackets().HasTag(kSelectTag)) { + use_side_packet_select_ = true; + selected_index_ = cc->InputSidePackets().Tag(kSelectTag).Get(); + } else { + select_input_ = cc->Inputs().GetId(kSelectTag, 0); + } + data_input_base_ = cc->Inputs().GetId(kInputTag, 0); + num_data_inputs_ = cc->Inputs().NumEntries(kInputTag); output_ = cc->Outputs().GetId("OUTPUT", 0); cc->SetOffset(TimestampDiff(0)); return ::mediapipe::OkStatus(); } ::mediapipe::Status Process(CalculatorContext* cc) final { - int select = cc->Inputs().Get(select_input_).Get(); + int select = use_side_packet_select_ + ? selected_index_ + : cc->Inputs().Get(select_input_).Get(); RET_CHECK(0 <= select && select < num_data_inputs_); if (!cc->Inputs().Get(data_input_base_ + select).IsEmpty()) { cc->Outputs().Get(output_).AddPacket( @@ -70,6 +99,8 @@ class MuxCalculator : public CalculatorBase { CollectionItemId data_input_base_; int num_data_inputs_ = 0; CollectionItemId output_; + bool use_side_packet_select_; + int selected_index_; }; REGISTER_CALCULATOR(MuxCalculator); diff --git a/mediapipe/calculators/core/mux_calculator_test.cc b/mediapipe/calculators/core/mux_calculator_test.cc new file mode 100644 index 000000000..ac6f7d6ee --- /dev/null +++ b/mediapipe/calculators/core/mux_calculator_test.cc @@ -0,0 +1,237 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/core/split_vector_calculator.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { + +typedef SplitVectorCalculator SplitIntVectorCalculator; +REGISTER_CALCULATOR(SplitIntVectorCalculator); + +namespace { + +// Graph with default input stream handler, and the input selection is driven +// by an input stream. All MuxCalculator inputs are present at each timestamp. +constexpr char kTestGraphConfig1[] = R"proto( + input_stream: "input" + output_stream: "test_output" + node { + calculator: "SplitIntVectorCalculator" + input_stream: "input" + output_stream: "stream0" + output_stream: "stream1" + output_stream: "stream2" + output_stream: "input_select" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 3 end: 4 } + element_only: true + } + } + } + node { + calculator: "MuxCalculator" + input_stream: "INPUT:0:stream0" + input_stream: "INPUT:1:stream1" + input_stream: "INPUT:2:stream2" + input_stream: "SELECT:input_select" + output_stream: "OUTPUT:test_output" + input_stream_handler { input_stream_handler: "DefaultInputStreamHandler" } + } +)proto"; + +// Graph with default input stream handler, and the input selection is driven +// by an input side packet. All MuxCalculator inputs are present at each +// timestamp. +constexpr char kTestGraphConfig2[] = R"proto( + input_side_packet: "input_selector" + input_stream: "input" + output_stream: "test_output" + node { + calculator: "SplitIntVectorCalculator" + input_stream: "input" + output_stream: "stream0" + output_stream: "stream1" + output_stream: "stream2" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + element_only: true + } + } + } + node { + calculator: "MuxCalculator" + input_stream: "INPUT:0:stream0" + input_stream: "INPUT:1:stream1" + input_stream: "INPUT:2:stream2" + input_side_packet: "SELECT:input_selector" + output_stream: "OUTPUT:test_output" + input_stream_handler { input_stream_handler: "DefaultInputStreamHandler" } + } +)proto"; + +// Graph with mux input stream handler, and the input selection is driven +// by an input stream. Only one MuxCalculator input is present at each +// timestamp. +constexpr char kTestGraphConfig3[] = R"proto( + input_stream: "input" + output_stream: "test_output" + node { + calculator: "RoundRobinDemuxCalculator" + input_stream: "input" + output_stream: "OUTPUT:0:stream0" + output_stream: "OUTPUT:1:stream1" + output_stream: "OUTPUT:2:stream2" + output_stream: "SELECT:input_select" + } + node { + calculator: "MuxCalculator" + input_stream: "INPUT:0:stream0" + input_stream: "INPUT:1:stream1" + input_stream: "INPUT:2:stream2" + input_stream: "SELECT:input_select" + output_stream: "OUTPUT:test_output" + } +)proto"; + +constexpr char kOutputName[] = "test_output"; +constexpr char kInputName[] = "input"; +constexpr char kInputSelector[] = "input_selector"; + +// Helper to run a graph with the given inputs and generate outputs, asserting +// each step along the way. +// Inputs: +// graph_config_proto - graph config protobuf +// extra_side_packets - input side packets name to value map +// input_stream_name - name of the input +void RunGraph(const std::string& graph_config_proto, + const std::map& extra_side_packets, + const std::string& input_stream_name, int num_input_packets, + std::function input_fn, + const std::string& output_stream_name, + std::function<::mediapipe::Status(const Packet&)> output_fn) { + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie( + graph_config_proto); + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream(output_stream_name, output_fn)); + MP_ASSERT_OK(graph.StartRun(extra_side_packets)); + for (int i = 0; i < num_input_packets; ++i) { + MP_ASSERT_OK(graph.AddPacketToInputStream(input_stream_name, input_fn(i))); + } + MP_ASSERT_OK(graph.CloseAllInputStreams()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST(MuxCalculatorTest, InputStreamSelector_DefaultInputStreamHandler) { + // Input and handling. + std::vector> input_packets = { + {1, 1, 2, 1}, {3, 5, 8, 2}, {13, 21, 34, 0}, + {55, 89, 144, 2}, {233, 377, 610, 0}, {987, 1597, 2584, 1}, + {4181, 6765, 10946, 2}, + }; + int packet_time_stamp = 22; + // This function will return the i-th input packet. + auto input_fn = [&packet_time_stamp, &input_packets](int i) -> Packet { + return MakePacket>(input_packets[i]) + .At(Timestamp(packet_time_stamp++)); + }; + + // Output and handling. + std::vector output; + // This function collects the output from the packet. + auto output_fn = [&output](const Packet& p) -> ::mediapipe::Status { + output.push_back(p.Get()); + return ::mediapipe::OkStatus(); + }; + + RunGraph(kTestGraphConfig1, {}, kInputName, input_packets.size(), input_fn, + kOutputName, output_fn); + EXPECT_THAT(output, testing::ElementsAre(1, 8, 13, 144, 233, 1597, 10946)); +} + +TEST(MuxCalculatorTest, InputSidePacketSelector_DefaultInputStreamHandler) { + // Input and handling. + std::vector> input_packets = { + {1, 1, 2}, {3, 5, 8}, {13, 21, 34}, {55, 89, 144}, + {233, 377, 610}, {987, 1597, 2584}, {4181, 6765, 10946}, + }; + int packet_time_stamp = 22; + // This function will return the i-th input packet. + auto input_fn = [&packet_time_stamp, &input_packets](int i) -> Packet { + return MakePacket>(input_packets[i]) + .At(Timestamp(packet_time_stamp++)); + }; + + // Output and handling. + std::vector output; + // This function collects the output from the packet. + auto output_fn = [&output](const Packet& p) -> ::mediapipe::Status { + output.push_back(p.Get()); + return ::mediapipe::OkStatus(); + }; + + RunGraph(kTestGraphConfig2, {{kInputSelector, MakePacket(0)}}, + kInputName, input_packets.size(), input_fn, kOutputName, output_fn); + EXPECT_THAT(output, testing::ElementsAre(1, 3, 13, 55, 233, 987, 4181)); + + output.clear(); + RunGraph(kTestGraphConfig2, {{kInputSelector, MakePacket(1)}}, + kInputName, input_packets.size(), input_fn, kOutputName, output_fn); + EXPECT_THAT(output, testing::ElementsAre(1, 5, 21, 89, 377, 1597, 6765)); + + output.clear(); + RunGraph(kTestGraphConfig2, {{kInputSelector, MakePacket(2)}}, + kInputName, input_packets.size(), input_fn, kOutputName, output_fn); + EXPECT_THAT(output, testing::ElementsAre(2, 8, 34, 144, 610, 2584, 10946)); +} + +TEST(MuxCalculatorTest, InputStreamSelector_MuxInputStreamHandler) { + // Input and handling. + std::vector input_packets = {1, 1, 2, 3, 5, 8, 13, + 21, 34, 55, 89, 144, 233, 377, + 610, 987, 1597, 2584, 4181, 6765, 10946}; + int packet_time_stamp = 22; + // This function will return the i-th input packet. + auto input_fn = [&packet_time_stamp, &input_packets](int i) -> Packet { + return MakePacket(input_packets[i]).At(Timestamp(packet_time_stamp++)); + }; + + // Output and handling. + std::vector output; + // This function collects the output from the packet. + auto output_fn = [&output](const Packet& p) -> ::mediapipe::Status { + output.push_back(p.Get()); + return ::mediapipe::OkStatus(); + }; + + RunGraph(kTestGraphConfig3, {}, kInputName, input_packets.size(), input_fn, + kOutputName, output_fn); + EXPECT_EQ(output, input_packets); +} +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index 9d14ec956..8cbf04410 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -128,11 +128,17 @@ class PreviousLoopbackCalculator : public CalculatorBase { loop_packets_.pop_front(); main_packet_specs_.pop_front(); } + + // We can close PREV_LOOP output stream as soon as we processed last + // possible MAIN packet. That can happen in two cases: + // a) Non-empty MAIN packet has been received with Timestamp::Max() + // b) Empty MAIN packet has been received with Timestamp::Max() indicating + // MAIN is done. + if (main_spec.timestamp == Timestamp::Done().PreviousAllowedInStream()) { + prev_loop.Close(); + } } - if (main_packet_specs_.empty() && cc->Inputs().Get(main_id_).IsDone()) { - prev_loop.Close(); - } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc index 0fabacd57..ef469b43a 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc @@ -228,6 +228,104 @@ TEST(PreviousLoopbackCalculator, ClosesCorrectly) { MP_EXPECT_OK(graph_.WaitUntilDone()); } +TEST(PreviousLoopbackCalculator, ProcessesMaxTimestamp) { + std::vector out_and_previous_packets; + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: 'in' + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:in' + input_stream: 'LOOP:out' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:previous' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + input_stream: 'previous' + output_stream: 'out' + output_stream: 'previous2' + } + node { + calculator: 'MakePairCalculator' + input_stream: 'out' + input_stream: 'previous' + output_stream: 'out_and_previous' + } + )"); + tool::AddVectorSink("out_and_previous", &graph_config, + &out_and_previous_packets); + + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config, {})); + MP_ASSERT_OK(graph.StartRun({})); + + MP_EXPECT_OK(graph.AddPacketToInputStream( + "in", MakePacket(1).At(Timestamp::Max()))); + + MP_EXPECT_OK(graph.WaitUntilIdle()); + + EXPECT_THAT(out_and_previous_packets, + ElementsAre(PairPacket(Timestamp::Max(), + Pair(IntPacket(1), EmptyPacket())))); + + MP_EXPECT_OK(graph.CloseAllInputStreams()); + MP_EXPECT_OK(graph.WaitUntilIdle()); + MP_EXPECT_OK(graph.WaitUntilDone()); +} + +TEST(PreviousLoopbackCalculator, ProcessesMaxTimestampNonEmptyPrevious) { + std::vector out_and_previous_packets; + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: 'in' + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:in' + input_stream: 'LOOP:out' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:previous' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + input_stream: 'previous' + output_stream: 'out' + output_stream: 'previous2' + } + node { + calculator: 'MakePairCalculator' + input_stream: 'out' + input_stream: 'previous' + output_stream: 'out_and_previous' + } + )"); + tool::AddVectorSink("out_and_previous", &graph_config, + &out_and_previous_packets); + + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config, {})); + MP_ASSERT_OK(graph.StartRun({})); + + MP_EXPECT_OK(graph.AddPacketToInputStream( + "in", MakePacket(1).At(Timestamp::Min()))); + MP_EXPECT_OK(graph.AddPacketToInputStream( + "in", MakePacket(2).At(Timestamp::Max()))); + + MP_EXPECT_OK(graph.WaitUntilIdle()); + + EXPECT_THAT( + out_and_previous_packets, + ElementsAre( + PairPacket(Timestamp::Min(), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp::Max(), Pair(IntPacket(2), IntPacket(1))))); + + MP_EXPECT_OK(graph.CloseAllInputStreams()); + MP_EXPECT_OK(graph.WaitUntilIdle()); + MP_EXPECT_OK(graph.WaitUntilDone()); +} + // Demonstrates that downstream calculators won't be blocked by // always-empty-LOOP-stream. TEST(PreviousLoopbackCalculator, EmptyLoopForever) { diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc index d7df7530b..47c3f624b 100644 --- a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc @@ -34,6 +34,8 @@ constexpr char kTagAtPostStream[] = "AT_POSTSTREAM"; constexpr char kTagAtZero[] = "AT_ZERO"; constexpr char kTagAtTick[] = "AT_TICK"; constexpr char kTagTick[] = "TICK"; +constexpr char kTagAtTimestamp[] = "AT_TIMESTAMP"; +constexpr char kTagSideInputTimestamp[] = "TIMESTAMP"; static std::map* kTimestampMap = []() { auto* res = new std::map(); @@ -41,6 +43,7 @@ static std::map* kTimestampMap = []() { res->emplace(kTagAtPostStream, Timestamp::PostStream()); res->emplace(kTagAtZero, Timestamp(0)); res->emplace(kTagAtTick, Timestamp::Unset()); + res->emplace(kTagAtTimestamp, Timestamp::Unset()); return res; }(); @@ -56,9 +59,10 @@ std::string GetOutputTag(const CC& cc) { // timestamp, depending on the tag used to define output stream(s). (One tag can // be used only.) // -// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO and AT_TICK and -// corresponding timestamps are Timestamp::PreStream(), Timestamp::PostStream(), -// Timestamp(0) and timestamp of a packet received in TICK input. +// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, AT_TIMESTAMP +// and corresponding timestamps are Timestamp::PreStream(), +// Timestamp::PostStream(), Timestamp(0), timestamp of a packet received in TICK +// input, and timestamp received from a side input. // // Examples: // node { @@ -73,6 +77,13 @@ std::string GetOutputTag(const CC& cc) { // input_side_packet: "side_packet" // output_stream: "AT_TICK:packet" // } +// +// node { +// calculator: "SidePacketToStreamCalculator" +// input_side_packet: "TIMESTAMP:timestamp" +// input_side_packet: "side_packet" +// output_stream: "AT_TIMESTAMP:packet" +// } class SidePacketToStreamCalculator : public CalculatorBase { public: SidePacketToStreamCalculator() = default; @@ -93,16 +104,29 @@ REGISTER_CALCULATOR(SidePacketToStreamCalculator); CalculatorContract* cc) { const auto& tags = cc->Outputs().GetTags(); RET_CHECK(tags.size() == 1 && kTimestampMap->count(*tags.begin()) == 1) - << "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO and AT_TICK tags is " - "allowed and required to specify output stream(s)."; + << "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " + "AT_TIMESTAMP tags is allowed and required to specify output " + "stream(s)."; RET_CHECK( (cc->Outputs().HasTag(kTagAtTick) && cc->Inputs().HasTag(kTagTick)) || (!cc->Outputs().HasTag(kTagAtTick) && !cc->Inputs().HasTag(kTagTick))) << "Either both of TICK and AT_TICK should be used or none of them."; + RET_CHECK((cc->Outputs().HasTag(kTagAtTimestamp) && + cc->InputSidePackets().HasTag(kTagSideInputTimestamp)) || + (!cc->Outputs().HasTag(kTagAtTimestamp) && + !cc->InputSidePackets().HasTag(kTagSideInputTimestamp))) + << "Either both TIMESTAMP and AT_TIMESTAMP should be used or none of " + "them."; const std::string output_tag = GetOutputTag(*cc); const int num_entries = cc->Outputs().NumEntries(output_tag); - RET_CHECK_EQ(num_entries, cc->InputSidePackets().NumEntries()) - << "Same number of input side packets and output streams is required."; + if (cc->Outputs().HasTag(kTagAtTimestamp)) { + RET_CHECK_EQ(num_entries + 1, cc->InputSidePackets().NumEntries()) + << "For AT_TIMESTAMP tag, 2 input side packets are required."; + cc->InputSidePackets().Tag(kTagSideInputTimestamp).Set(); + } else { + RET_CHECK_EQ(num_entries, cc->InputSidePackets().NumEntries()) + << "Same number of input side packets and output streams is required."; + } for (int i = 0; i < num_entries; ++i) { cc->InputSidePackets().Index(i).SetAny(); cc->Outputs() @@ -147,13 +171,22 @@ REGISTER_CALCULATOR(SidePacketToStreamCalculator); } ::mediapipe::Status SidePacketToStreamCalculator::Close(CalculatorContext* cc) { - if (!cc->Outputs().HasTag(kTagAtTick)) { + if (!cc->Outputs().HasTag(kTagAtTick) && + !cc->Outputs().HasTag(kTagAtTimestamp)) { const auto& timestamp = kTimestampMap->at(output_tag_); for (int i = 0; i < cc->Outputs().NumEntries(output_tag_); ++i) { cc->Outputs() .Get(output_tag_, i) .AddPacket(cc->InputSidePackets().Index(i).At(timestamp)); } + } else if (cc->Outputs().HasTag(kTagAtTimestamp)) { + int64 timestamp = + cc->InputSidePackets().Tag(kTagSideInputTimestamp).Get(); + for (int i = 0; i < cc->Outputs().NumEntries(output_tag_); ++i) { + cc->Outputs() + .Get(output_tag_, i) + .AddPacket(cc->InputSidePackets().Index(i).At(Timestamp(timestamp))); + } } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc index 078055f07..e7195e03b 100644 --- a/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator_test.cc @@ -51,6 +51,27 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MissingTick) { "Either both of TICK and AT_TICK should be used or none of them."); } +TEST(SidePacketToStreamCalculator, WrongConfig_MissingTimestampSideInput) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"( + input_stream: "timestamp" + input_side_packet: "side_packet" + output_stream: "packet" + node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "side_packet" + output_stream: "AT_TIMESTAMP:packet" + } + )"); + CalculatorGraph graph; + auto status = graph.Initialize(graph_config); + EXPECT_FALSE(status.ok()); + EXPECT_PRED2( + absl::StrContains, status.message(), + "Either both TIMESTAMP and AT_TIMESTAMP should be used or none of them."); +} + TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) { CalculatorGraphConfig graph_config = ParseTextProtoOrDie( @@ -68,8 +89,9 @@ TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) { auto status = graph.Initialize(graph_config); EXPECT_FALSE(status.ok()); EXPECT_PRED2(absl::StrContains, status.message(), - "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO and AT_TICK " - "tags is allowed and required to specify output stream(s)."); + "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " + "AT_TIMESTAMP tags is allowed and required to specify output " + "stream(s)."); } TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) { @@ -91,8 +113,9 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) { auto status = graph.Initialize(graph_config); EXPECT_FALSE(status.ok()); EXPECT_PRED2(absl::StrContains, status.message(), - "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO and AT_TICK " - "tags is allowed and required to specify output stream(s)."); + "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and " + "AT_TIMESTAMP tags is allowed and required to specify output " + "stream(s)."); } TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughSidePackets) { @@ -271,5 +294,79 @@ TEST(SidePacketToStreamCalculator, AtTick_MultipleSidePackets) { tick_and_verify(/*at_timestamp=*/1025); } +TEST(SidePacketToStreamCalculator, AtTimestamp) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"( + input_side_packet: "timestamp" + input_side_packet: "side_packet" + output_stream: "packet" + node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "TIMESTAMP:timestamp" + input_side_packet: "side_packet" + output_stream: "AT_TIMESTAMP:packet" + } + )"); + std::vector output_packets; + tool::AddVectorSink("packet", &graph_config, &output_packets); + CalculatorGraph graph; + + MP_ASSERT_OK(graph.Initialize(graph_config)); + const int expected_value = 20; + const int64 expected_timestamp = 5; + MP_ASSERT_OK( + graph.StartRun({{"side_packet", MakePacket(expected_value)}, + {"timestamp", MakePacket(expected_timestamp)}})); + + MP_ASSERT_OK(graph.WaitUntilDone()); + + ASSERT_FALSE(output_packets.empty()); + EXPECT_EQ(Timestamp(expected_timestamp), output_packets.back().Timestamp()); + EXPECT_EQ(expected_value, output_packets.back().Get()); +} + +TEST(SidePacketToStreamCalculator, AtTimestamp_MultipleOutputs) { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie( + R"( + input_side_packet: "timestamp" + input_side_packet: "side_packet0" + input_side_packet: "side_packet1" + output_stream: "packet" + node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "TIMESTAMP:timestamp" + input_side_packet: "side_packet0" + input_side_packet: "side_packet1" + output_stream: "AT_TIMESTAMP:0:packet0" + output_stream: "AT_TIMESTAMP:1:packet1" + } + )"); + std::vector output_packets0; + tool::AddVectorSink("packet0", &graph_config, &output_packets0); + std::vector output_packets1; + tool::AddVectorSink("packet1", &graph_config, &output_packets1); + CalculatorGraph graph; + + MP_ASSERT_OK(graph.Initialize(graph_config)); + const int expected_value0 = 20; + const int expected_value1 = 15; + const int64 expected_timestamp = 5; + MP_ASSERT_OK( + graph.StartRun({{"side_packet0", MakePacket(expected_value0)}, + {"side_packet1", MakePacket(expected_value1)}, + {"timestamp", MakePacket(expected_timestamp)}})); + + MP_ASSERT_OK(graph.WaitUntilDone()); + + ASSERT_FALSE(output_packets0.empty()); + EXPECT_EQ(Timestamp(expected_timestamp), output_packets0.back().Timestamp()); + EXPECT_EQ(expected_value0, output_packets0.back().Get()); + ASSERT_FALSE(output_packets1.empty()); + EXPECT_EQ(Timestamp(expected_timestamp), output_packets1.back().Timestamp()); + EXPECT_EQ(expected_value1, output_packets1.back().Get()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 7efb4a011..3cefe9439 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -630,3 +630,34 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "image_file_properties_calculator", + srcs = ["image_file_properties_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_file_properties_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@easyexif", + ], + alwayslink = 1, +) + +cc_test( + name = "image_file_properties_calculator_test", + srcs = ["image_file_properties_calculator_test.cc"], + data = ["//mediapipe/calculators/image/testdata:test_images"], + deps = [ + ":image_file_properties_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image_file_properties_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + ], +) diff --git a/mediapipe/calculators/image/image_file_properties_calculator.cc b/mediapipe/calculators/image/image_file_properties_calculator.cc new file mode 100644 index 000000000..82af9ef8a --- /dev/null +++ b/mediapipe/calculators/image/image_file_properties_calculator.cc @@ -0,0 +1,195 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "exif.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_file_properties.pb.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace { + +// 35 MM sensor has dimensions 36 mm x 24 mm, so diagonal length is +// sqrt(36^2 + 24^2). +static const double SENSOR_DIAGONAL_35MM = std::sqrt(1872.0); + +::mediapipe::StatusOr ComputeFocalLengthInPixels( + int image_width, int image_height, double focal_length_35mm, + double focal_length_mm) { + // TODO: Allow returning image file properties even when focal length + // computation is not possible. + if (image_width == 0 || image_height == 0) { + return ::mediapipe::InternalError( + "Image dimensions should be non-zero to compute focal length in " + "pixels."); + } + if (focal_length_mm == 0) { + return ::mediapipe::InternalError( + "Focal length in mm should be non-zero to compute focal length in " + "pixels."); + } + if (focal_length_35mm == 0) { + return ::mediapipe::InternalError( + "Focal length in 35 mm should be non-zero to compute focal length in " + "pixels."); + } + // Derived from + // https://en.wikipedia.org/wiki/35_mm_equivalent_focal_length#Calculation. + /// Using focal_length_35mm = focal_length_mm * SENSOR_DIAGONAL_35MM / + /// sensor_diagonal_mm, we can calculate the diagonal length of the sensor in + /// millimeters i.e. sensor_diagonal_mm. + double sensor_diagonal_mm = + SENSOR_DIAGONAL_35MM / focal_length_35mm * focal_length_mm; + // Note that for the following computations, the longer dimension is treated + // as image width and the shorter dimension is treated as image height. + int width = image_width; + int height = image_height; + if (image_height > image_width) { + width = image_height; + height = image_width; + } + double inv_aspect_ratio = (double)height / width; + // Compute sensor width. + /// Using Pythagoras theorem, sensor_width^2 + sensor_height^2 = + /// sensor_diagonal_mm^2. We can substitute sensor_width / sensor_height with + /// the aspect ratio calculated in pixels to compute the sensor width. + double sensor_width = std::sqrt((sensor_diagonal_mm * sensor_diagonal_mm) / + (1.0 + inv_aspect_ratio * inv_aspect_ratio)); + + // Compute focal length in pixels. + double focal_length_pixels = width * focal_length_mm / sensor_width; + return focal_length_pixels; +} + +::mediapipe::StatusOr GetImageFileProperites( + const std::string& image_bytes) { + easyexif::EXIFInfo result; + int code = result.parseFrom(image_bytes); + if (code) { + return ::mediapipe::InternalError("Error parsing EXIF, code: " + + std::to_string(code)); + } + + ImageFileProperties properties; + properties.set_image_width(result.ImageWidth); + properties.set_image_height(result.ImageHeight); + properties.set_focal_length_mm(result.FocalLength); + properties.set_focal_length_35mm(result.FocalLengthIn35mm); + + ASSIGN_OR_RETURN(auto focal_length_pixels, + ComputeFocalLengthInPixels(properties.image_width(), + properties.image_height(), + properties.focal_length_35mm(), + properties.focal_length_mm())); + properties.set_focal_length_pixels(focal_length_pixels); + + return properties; +} + +} // namespace + +// Calculator to extract EXIF information from an image file. The input is +// a std::string containing raw byte data from a file, and the output is an +// ImageFileProperties proto object with the relevant fields filled in. +// The calculator accepts the input as a stream or a side packet, and can output +// the result as a stream or a side packet. The calculator checks that if an +// output stream is present, it outputs to that stream, and if not, it checks if +// it can output to a side packet. +// +// Example config with input and output streams: +// node { +// calculator: "ImageFilePropertiesCalculator" +// input_stream: "image_bytes" +// output_stream: "image_properties" +// } +// Example config with input and output side packets: +// node { +// calculator: "ImageFilePropertiesCalculator" +// input_side_packet: "image_bytes" +// output_side_packet: "image_properties" +// } +class ImageFilePropertiesCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + if (cc->Inputs().NumEntries() != 0) { + RET_CHECK(cc->Inputs().NumEntries() == 1); + cc->Inputs().Index(0).Set(); + } else { + RET_CHECK(cc->InputSidePackets().NumEntries() == 1); + cc->InputSidePackets().Index(0).Set(); + } + if (cc->Outputs().NumEntries() != 0) { + RET_CHECK(cc->Outputs().NumEntries() == 1); + cc->Outputs().Index(0).Set<::mediapipe::ImageFileProperties>(); + } else { + RET_CHECK(cc->OutputSidePackets().NumEntries() == 1); + cc->OutputSidePackets().Index(0).Set<::mediapipe::ImageFileProperties>(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + + if (cc->InputSidePackets().NumEntries() == 1) { + const std::string& image_bytes = + cc->InputSidePackets().Index(0).Get(); + ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + read_properties_ = true; + } + + if (read_properties_ && cc->OutputSidePackets().NumEntries() == 1) { + cc->OutputSidePackets().Index(0).Set( + MakePacket(properties_)); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (cc->Inputs().NumEntries() == 1) { + if (cc->Inputs().Index(0).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + const std::string& image_bytes = cc->Inputs().Index(0).Get(); + ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + read_properties_ = true; + } + if (read_properties_) { + if (cc->Outputs().NumEntries() == 1) { + cc->Outputs().Index(0).AddPacket( + MakePacket(properties_) + .At(cc->InputTimestamp())); + } else { + cc->OutputSidePackets().Index(0).Set( + MakePacket(properties_) + .At(::mediapipe::Timestamp::Unset())); + } + } + + return ::mediapipe::OkStatus(); + } + + private: + ImageFileProperties properties_; + bool read_properties_ = false; +}; +REGISTER_CALCULATOR(ImageFilePropertiesCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/image_file_properties_calculator_test.cc b/mediapipe/calculators/image/image_file_properties_calculator_test.cc new file mode 100644 index 000000000..954f095d6 --- /dev/null +++ b/mediapipe/calculators/image/image_file_properties_calculator_test.cc @@ -0,0 +1,134 @@ +// Copyright 2018 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image_file_properties.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { + +namespace { + +constexpr char kImageFilePath[] = + "/mediapipe/calculators/image/testdata/" + "front_camera_pixel2.jpg"; +constexpr int kExpectedWidth = 2448; +constexpr int kExpectedHeight = 3264; +constexpr double kExpectedFocalLengthMm = 3.38; +constexpr double kExpectedFocalLengthIn35Mm = 25; +constexpr double kExpectedFocalLengthPixels = 2357.48; + +double RoundToNDecimals(double value, int n) { + return std::round(value * pow(10.0, n)) / pow(10.0, n); +} + +TEST(ImageFilePropertiesCalculatorTest, ReadsFocalLengthFromJpegInStreams) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_stream: "image_bytes" + output_stream: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableInputs()->Index(0).packets.push_back( + MakePacket(image_contents).At(Timestamp(0))); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.Outputs(); + ASSERT_EQ(1, outputs.NumEntries()); + const std::vector& packets = outputs.Index(0).packets; + ASSERT_EQ(1, packets.size()); + const auto& result = packets[0].Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +TEST(ImageFilePropertiesCalculatorTest, ReadsFocalLengthFromJpegInSidePackets) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_side_packet: "image_bytes" + output_side_packet: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableSidePackets()->Index(0) = + MakePacket(image_contents).At(Timestamp(0)); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.OutputSidePackets(); + EXPECT_EQ(1, outputs.NumEntries()); + const auto& packet = outputs.Index(0); + const auto& result = packet.Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +TEST(ImageFilePropertiesCalculatorTest, + ReadsFocalLengthFromJpegStreamToSidePacket) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_stream: "image_bytes" + output_side_packet: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableInputs()->Index(0).packets.push_back( + MakePacket(image_contents).At(Timestamp(0))); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.OutputSidePackets(); + EXPECT_EQ(1, outputs.NumEntries()); + const auto& packet = outputs.Index(0); + const auto& result = packet.Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/image/image_transformation_calculator.cc b/mediapipe/calculators/image/image_transformation_calculator.cc index cb5f6419e..37539d814 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.cc +++ b/mediapipe/calculators/image/image_transformation_calculator.cc @@ -449,19 +449,15 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); switch (rotation_) { case mediapipe::RotationMode_Mode_UNKNOWN: case mediapipe::RotationMode_Mode_ROTATION_0: - LOG(ERROR) << "Not rotating image."; rotated_mat = input_mat; break; case mediapipe::RotationMode_Mode_ROTATION_90: - LOG(ERROR) << "Rotating image by 90 degrees ccw."; cv::rotate(input_mat, rotated_mat, cv::ROTATE_90_COUNTERCLOCKWISE); break; case mediapipe::RotationMode_Mode_ROTATION_180: - LOG(ERROR) << "Rotating image by 180 degrees."; cv::rotate(input_mat, rotated_mat, cv::ROTATE_180); break; case mediapipe::RotationMode_Mode_ROTATION_270: - LOG(ERROR) << "Rotating image by 90 degrees cw."; cv::rotate(input_mat, rotated_mat, cv::ROTATE_90_CLOCKWISE); break; } diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index ea3fcc715..f934bd5a4 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -57,22 +57,6 @@ proto_library( deps = ["//mediapipe/framework:calculator_proto"], ) -proto_library( - name = "tensorflow_session_from_saved_model_generator_proto", - srcs = ["tensorflow_session_from_saved_model_generator.proto"], - visibility = ["//visibility:public"], - deps = ["//mediapipe/framework:packet_generator_proto"], -) - -proto_library( - name = "tensorflow_session_from_saved_model_calculator_proto", - srcs = ["tensorflow_session_from_saved_model_calculator.proto"], - visibility = ["//visibility:public"], - deps = [ - "//mediapipe/framework:calculator_proto", - ], -) - proto_library( name = "tensor_squeeze_dimensions_calculator_proto", srcs = ["tensor_squeeze_dimensions_calculator.proto"], @@ -212,7 +196,10 @@ mediapipe_cc_proto_library( mediapipe_cc_proto_library( name = "tensorflow_session_from_saved_model_generator_cc_proto", srcs = ["tensorflow_session_from_saved_model_generator.proto"], - cc_deps = ["//mediapipe/framework:packet_generator_cc_proto"], + cc_deps = [ + "//mediapipe/framework:packet_generator_cc_proto", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], visibility = ["//visibility:public"], deps = [":tensorflow_session_from_saved_model_generator_proto"], ) @@ -220,7 +207,10 @@ mediapipe_cc_proto_library( mediapipe_cc_proto_library( name = "tensorflow_session_from_saved_model_calculator_cc_proto", srcs = ["tensorflow_session_from_saved_model_calculator.proto"], - cc_deps = ["//mediapipe/framework:calculator_cc_proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], visibility = ["//visibility:public"], deps = [":tensorflow_session_from_saved_model_calculator_proto"], ) @@ -488,6 +478,8 @@ cc_library( "//mediapipe/calculators/tensorflow:tensorflow_session_from_frozen_graph_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/tool:status_util", + "//mediapipe/framework/deps:clock", + "//mediapipe/framework/port:logging", "//mediapipe/framework/port:status", "//mediapipe/framework/port:ret_check", ] + select({ @@ -518,6 +510,8 @@ cc_library( "//mediapipe/framework:calculator_framework", "//mediapipe/framework/tool:status_util", "//mediapipe/framework/port:status", + "//mediapipe/framework/deps:clock", + "//mediapipe/framework/port:logging", "//mediapipe/framework/port:ret_check", ] + select({ "//conditions:default": [ @@ -929,6 +923,7 @@ cc_test( "@com_google_absl//absl/strings", "@org_tensorflow//tensorflow/core:all_kernels", "@org_tensorflow//tensorflow/core:direct_session", + "@org_tensorflow//tensorflow/core:protos_all_cc", ], ) @@ -954,6 +949,7 @@ cc_test( "@com_google_absl//absl/strings", "@org_tensorflow//tensorflow/core:all_kernels", "@org_tensorflow//tensorflow/core:direct_session", + "@org_tensorflow//tensorflow/core:protos_all_cc", ], ) diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc index 53f6b70f8..7975d4c9d 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.cc @@ -26,9 +26,14 @@ #include "mediapipe/calculators/tensorflow/tensorflow_session.h" #include "mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/clock.h" +#include "mediapipe/framework/deps/monotonic_clock.h" +#include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/tool/status_util.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/public/session_options.h" #if defined(MEDIAPIPE_MOBILE) @@ -41,6 +46,17 @@ namespace mediapipe { namespace tf = ::tensorflow; +namespace { +// Updates the graph nodes to use the device as specified by device_id. +void SetPreferredDevice(tf::GraphDef* graph_def, absl::string_view device_id) { + for (auto& node : *graph_def->mutable_node()) { + if (node.device().empty()) { + node.set_device(device_id); + } + } +} +} // namespace + class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { @@ -77,6 +93,9 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase { } ::mediapipe::Status Open(CalculatorContext* cc) override { + auto clock = std::unique_ptr( + mediapipe::MonotonicClock::CreateSynchronizedMonotonicClock()); + const uint64 start_time = absl::ToUnixMicros(clock->TimeNow()); const auto& options = cc->Options(); // Output bundle packet. @@ -108,6 +127,12 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase { tensorflow::GraphDef graph_def; RET_CHECK(graph_def.ParseFromString(graph_def_serialized)); + + // Update the graph nodes to use the preferred device, if set. + if (!options.preferred_device_id().empty()) { + SetPreferredDevice(&graph_def, options.preferred_device_id()); + } + const tf::Status tf_status = session->session->Create(graph_def); RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.ToString(); @@ -123,6 +148,9 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase { } cc->OutputSidePackets().Tag("SESSION").Set(Adopt(session.release())); + const uint64 end_time = absl::ToUnixMicros(clock->TimeNow()); + LOG(INFO) << "Loaded frozen model in: " << end_time - start_time + << " microseconds."; return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.proto b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.proto index 3921d2016..87b2304ad 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator.proto @@ -69,4 +69,12 @@ message TensorFlowSessionFromFrozenGraphCalculatorOptions { // Graph nodes to run to initialize the model. Any output of these ops is // ignored. repeated string initialization_op_names = 4; + + // The id of the device you would prefer to execute the graph nodes on. + // If set, all graph nodes without a previously specified device, will be set + // to run on preferred_device_id. Example values include: + // ["/device:GPU:0","/device:CPU:0", ...] + // NOTE: If config.allow_soft_placement = false, and the device is not found, + // an error will be thrown. + optional string preferred_device_id = 5; } diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator_test.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator_test.cc index c2b774278..5277eb348 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_calculator_test.cc @@ -66,6 +66,7 @@ class TensorFlowSessionFromFrozenGraphCalculatorTest : public ::testing::Test { (*calculator_options_->mutable_tag_to_tensor_names())["B"] = "b:0"; calculator_options_->mutable_config()->set_intra_op_parallelism_threads(1); calculator_options_->mutable_config()->set_inter_op_parallelism_threads(2); + calculator_options_->set_preferred_device_id("/device:CPU:0"); } void VerifySignatureMap(const TensorFlowSession& session) { diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.cc index 4f71336f2..0cb4a70b5 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.cc @@ -27,16 +27,32 @@ #include "mediapipe/calculators/tensorflow/tensorflow_session.h" #include "mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.pb.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/clock.h" +#include "mediapipe/framework/deps/monotonic_clock.h" #include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/tool/status_util.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/public/session_options.h" namespace mediapipe { namespace tf = ::tensorflow; +namespace { +// Updates the graph nodes to use the device as specified by device_id. +void SetPreferredDevice(tf::GraphDef* graph_def, absl::string_view device_id) { + for (auto& node : *graph_def->mutable_node()) { + if (node.device().empty()) { + node.set_device(device_id); + } + } +} +} // namespace + class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator { public: static ::mediapipe::Status FillExpectations( @@ -77,6 +93,9 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator { static ::mediapipe::Status Generate( const PacketGeneratorOptions& packet_generator_options, const PacketSet& input_side_packets, PacketSet* output_side_packets) { + auto clock = std::unique_ptr( + mediapipe::MonotonicClock::CreateSynchronizedMonotonicClock()); + const uint64 start_time = absl::ToUnixMicros(clock->TimeNow()); const TensorFlowSessionFromFrozenGraphGeneratorOptions& options = packet_generator_options.GetExtension( TensorFlowSessionFromFrozenGraphGeneratorOptions::ext); @@ -108,6 +127,12 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator { tensorflow::GraphDef graph_def; RET_CHECK(graph_def.ParseFromString(graph_def_serialized)); + + // Update the graph nodes to use the preferred device, if set. + if (!options.preferred_device_id().empty()) { + SetPreferredDevice(&graph_def, options.preferred_device_id()); + } + const tf::Status tf_status = session->session->Create(graph_def); RET_CHECK(tf_status.ok()) << "Create failed: " << tf_status.ToString(); @@ -123,6 +148,9 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator { } output_side_packets->Tag("SESSION") = Adopt(session.release()); + const uint64 end_time = absl::ToUnixMicros(clock->TimeNow()); + LOG(INFO) << "Loaded frozen model in: " << end_time - start_time + << " microseconds."; return ::mediapipe::OkStatus(); } }; diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.proto b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.proto index 183b5a5a5..4643b4d60 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator.proto @@ -69,4 +69,12 @@ message TensorFlowSessionFromFrozenGraphGeneratorOptions { // Graph nodes to run to initialize the model. Any output of these ops is // ignored. repeated string initialization_op_names = 4; + + // The id of the device you would prefer to execute the graph nodes on. + // If set, all graph nodes without a previously specified device, will be set + // to run on preferred_device_id. Example values include: + // ["/device:GPU:0","/device:CPU:0", ...] + // NOTE: If config.allow_soft_placement = false, and the device is not found, + // an error will be thrown. + optional string preferred_device_id = 5; } diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator_test.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator_test.cc index d11007299..e2b968217 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator_test.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_frozen_graph_generator_test.cc @@ -66,6 +66,7 @@ class TensorFlowSessionFromFrozenGraphGeneratorTest : public ::testing::Test { (*generator_options_->mutable_tag_to_tensor_names())["B"] = "b:0"; generator_options_->mutable_config()->set_intra_op_parallelism_threads(1); generator_options_->mutable_config()->set_inter_op_parallelism_threads(2); + generator_options_->set_preferred_device_id("/device:CPU:0"); } void VerifySignatureMap(PacketSet* output_side_packets) { diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc index b54976478..55709bcd9 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc @@ -134,8 +134,8 @@ class TensorFlowSessionFromSavedModelCalculator : public CalculatorBase { } tensorflow::RunOptions run_options; - // In the future, could construct session options from the options proto. tensorflow::SessionOptions session_options; + session_options.config = options.session_config(); auto saved_model = absl::make_unique(); ::tensorflow::Status status = tensorflow::LoadSavedModel( session_options, run_options, path, tags_set, saved_model.get()); diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.proto b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.proto index 66e03d893..a8839ef52 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.proto @@ -17,6 +17,7 @@ syntax = "proto2"; package mediapipe; import "mediapipe/framework/calculator.proto"; +import "tensorflow/core/protobuf/config.proto"; message TensorFlowSessionFromSavedModelCalculatorOptions { extend mediapipe.CalculatorOptions { @@ -55,4 +56,7 @@ message TensorFlowSessionFromSavedModelCalculatorOptions { // If no tag is specified, then use "serve" as the default. Note that in order // to use TPU accelerator hardware, the tag "tpu" needs to be specified. repeated string saved_model_tag = 6; + + // Tensorflow session config options. + optional tensorflow.ConfigProto session_config = 7; } diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator_test.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator_test.cc index fee0da0fb..d6064d862 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator_test.cc @@ -26,6 +26,7 @@ #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/tool/tag_map_helper.h" #include "mediapipe/framework/tool/validate_type.h" +#include "tensorflow/core/framework/device_attributes.pb.h" namespace mediapipe { @@ -204,5 +205,31 @@ TEST_F(TensorFlowSessionFromSavedModelCalculatorTest, ASSERT_NE(session.session, nullptr); } +TEST_F(TensorFlowSessionFromSavedModelCalculatorTest, + ConfiguresSessionGivenConfig) { + options_->set_saved_model_path( + std::string(file::SplitPath(GetSavedModelDir()).first)); + options_->set_load_latest_model(true); + options_->mutable_session_config()->mutable_device_count()->insert( + {"CPU", 10}); + CalculatorRunner runner(absl::Substitute(R"( + calculator: "TensorFlowSessionFromSavedModelCalculator" + output_side_packet: "SESSION:tf_model" + options { + [mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions.ext]: { + $0 + } + })", + options_->DebugString())); + MP_ASSERT_OK(runner.Run()); + const TensorFlowSession& session = + runner.OutputSidePackets().Tag("SESSION").Get(); + // Session must be set. + ASSERT_NE(session.session, nullptr); + std::vector devices; + ASSERT_EQ(session.session->ListDevices(&devices), tensorflow::Status::OK()); + EXPECT_THAT(devices.size(), 10); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc index 6e1a29e59..73ffc6497 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc @@ -129,8 +129,8 @@ class TensorFlowSessionFromSavedModelGenerator : public PacketGenerator { } tensorflow::RunOptions run_options; - // In the future, could construct session options from the options proto. tensorflow::SessionOptions session_options; + session_options.config = options.session_config(); auto saved_model = absl::make_unique(); ::tensorflow::Status status = tensorflow::LoadSavedModel( session_options, run_options, path, tags_set, saved_model.get()); diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.proto b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.proto index 2dab09242..88ce93435 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.proto +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.proto @@ -17,6 +17,7 @@ syntax = "proto2"; package mediapipe; import "mediapipe/framework/packet_generator.proto"; +import "tensorflow/core/protobuf/config.proto"; message TensorFlowSessionFromSavedModelGeneratorOptions { extend mediapipe.PacketGeneratorOptions { @@ -55,4 +56,7 @@ message TensorFlowSessionFromSavedModelGeneratorOptions { // If no tag is specified, then use "serve" as the default. Note that in order // to use TPU accelerator hardware, the tag "tpu" needs to be specified. repeated string saved_model_tag = 6; + + // Tensorflow session config options. + optional tensorflow.ConfigProto session_config = 9; } diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator_test.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator_test.cc index d12fee12a..792c3841b 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator_test.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator_test.cc @@ -25,6 +25,7 @@ #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/tool/tag_map_helper.h" #include "mediapipe/framework/tool/validate_type.h" +#include "tensorflow/core/framework/device_attributes.pb.h" namespace mediapipe { @@ -196,5 +197,29 @@ TEST_F(TensorFlowSessionFromSavedModelGeneratorTest, ASSERT_NE(session.session, nullptr); } +TEST_F(TensorFlowSessionFromSavedModelGeneratorTest, + ConfiguresSessionGivenConfig) { + generator_options_->set_saved_model_path( + std::string(file::SplitPath(GetSavedModelDir()).first)); + generator_options_->set_load_latest_model(true); + generator_options_->mutable_session_config()->mutable_device_count()->insert( + {"CPU", 10}); + + PacketSet input_side_packets(tool::CreateTagMap({}).ValueOrDie()); + PacketSet output_side_packets( + tool::CreateTagMap({"SESSION:session"}).ValueOrDie()); + ::mediapipe::Status run_status = tool::RunGenerateAndValidateTypes( + "TensorFlowSessionFromSavedModelGenerator", extendable_options_, + input_side_packets, &output_side_packets); + MP_EXPECT_OK(run_status) << run_status.message(); + const TensorFlowSession& session = + output_side_packets.Tag("SESSION").Get(); + // Session must be set. + ASSERT_NE(session.session, nullptr); + std::vector devices; + ASSERT_EQ(session.session->ListDevices(&devices), tensorflow::Status::OK()); + EXPECT_THAT(devices.size(), 10); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc index 068be5714..f7c041788 100644 --- a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc @@ -91,11 +91,11 @@ REGISTER_CALCULATOR(VectorFloatToTensorCalculator); cc->Inputs().Index(0).Value().Get>>(); const int32 rows = input.size(); - CHECK_GE(rows, 1); + RET_CHECK_GE(rows, 1); const int32 cols = input[0].size(); - CHECK_GE(cols, 1); + RET_CHECK_GE(cols, 1); for (int i = 1; i < rows; ++i) { - CHECK_EQ(input[i].size(), cols); + RET_CHECK_EQ(input[i].size(), cols); } if (options_.transpose()) { tensor_shape = tf::TensorShape({cols, rows}); @@ -116,7 +116,7 @@ REGISTER_CALCULATOR(VectorFloatToTensorCalculator); } else if (options_.input_size() == INPUT_1D) { const std::vector& input = cc->Inputs().Index(0).Value().Get>(); - CHECK_GE(input.size(), 1); + RET_CHECK_GE(input.size(), 1); const int32 length = input.size(); tensor_shape = tf::TensorShape({length}); auto output = ::absl::make_unique(tf::DT_FLOAT, tensor_shape); diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index f1101a009..2c4bb637b 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -196,13 +196,6 @@ cc_test( ], ) -cc_library( - name = "util", - hdrs = ["util.h"], - visibility = ["//visibility:public"], - alwayslink = 1, -) - selects.config_setting_group( name = "gpu_inference_disabled", match_any = [ @@ -229,7 +222,6 @@ cc_library( }), visibility = ["//visibility:public"], deps = [ - ":util", ":tflite_inference_calculator_cc_proto", "@com_google_absl//absl/memory", "//mediapipe/framework:calculator_framework", @@ -295,7 +287,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//mediapipe/util/tflite:config", - ":util", ":tflite_converter_calculator_cc_proto", "//mediapipe/util:resource_util", "//mediapipe/framework:calculator_framework", @@ -334,7 +325,6 @@ cc_library( srcs = ["tflite_model_calculator.cc"], visibility = ["//visibility:public"], deps = [ - ":util", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:packet", "//mediapipe/framework/port:ret_check", @@ -348,7 +338,6 @@ cc_library( srcs = ["tflite_tensors_to_segmentation_calculator.cc"], visibility = ["//visibility:public"], deps = [ - ":util", ":tflite_tensors_to_segmentation_calculator_cc_proto", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:span", @@ -418,7 +407,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//mediapipe/util/tflite:config", - ":util", ":tflite_tensors_to_detections_calculator_cc_proto", "//mediapipe/framework/formats:detection_cc_proto", "@com_google_absl//absl/strings:str_format", @@ -551,6 +539,7 @@ cc_test( "//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/tool:validate_type", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", "@org_tensorflow//tensorflow/lite:framework", ], ) diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index 6a3011141..e81354242 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -16,7 +16,6 @@ #include #include "mediapipe/calculators/tflite/tflite_converter_calculator.pb.h" -#include "mediapipe/calculators/tflite/util.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/matrix.h" @@ -146,8 +145,7 @@ class TfLiteConverterCalculator : public CalculatorBase { ::mediapipe::Status LoadOptions(CalculatorContext* cc); template ::mediapipe::Status NormalizeImage(const ImageFrame& image_frame, - bool zero_center, bool flip_vertically, - float* tensor_ptr); + bool flip_vertically, float* tensor_ptr); ::mediapipe::Status CopyMatrixToTensor(const Matrix& matrix, float* tensor_ptr); ::mediapipe::Status ProcessCPU(CalculatorContext* cc); @@ -165,10 +163,7 @@ class TfLiteConverterCalculator : public CalculatorBase { bool initialized_ = false; bool use_gpu_ = false; - bool zero_center_ = true; // normalize range to [-1,1] | otherwise [0,1] - bool use_custom_normalization_ = false; - float custom_div_ = -1.0f; - float custom_sub_ = -1.0f; + absl::optional> output_range_; bool flip_vertically_ = false; bool row_major_matrix_ = false; bool use_quantized_tensors_ = false; @@ -362,11 +357,11 @@ bool ShouldUseGpu(CC* cc) { float* tensor_buffer = tensor->data.f; RET_CHECK(tensor_buffer); if (image_frame.ByteDepth() == 1) { - MP_RETURN_IF_ERROR(NormalizeImage( - image_frame, zero_center_, flip_vertically_, tensor_buffer)); + MP_RETURN_IF_ERROR(NormalizeImage(image_frame, flip_vertically_, + tensor_buffer)); } else if (image_frame.ByteDepth() == 4) { - MP_RETURN_IF_ERROR(NormalizeImage( - image_frame, zero_center_, flip_vertically_, tensor_buffer)); + MP_RETURN_IF_ERROR(NormalizeImage(image_frame, flip_vertically_, + tensor_buffer)); } else { return ::mediapipe::InternalError( "Only byte-based (8 bit) and float (32 bit) images supported."); @@ -427,11 +422,11 @@ bool ShouldUseGpu(CC* cc) { auto src = gpu_helper_.CreateSourceTexture(input); glActiveTexture(GL_TEXTURE0 + 0); glBindTexture(GL_TEXTURE_2D, src.name()); - RET_CHECK_CALL(gpu_data_out_->buffer.BindToIndex(1)); + MP_RETURN_IF_ERROR(gpu_data_out_->buffer.BindToIndex(1)); const tflite::gpu::uint3 workgroups = { NumGroups(input.width(), kWorkgroupSize), NumGroups(input.height(), kWorkgroupSize), 1}; - RET_CHECK_CALL(gpu_data_out_->program.Dispatch(workgroups)); + MP_RETURN_IF_ERROR(gpu_data_out_->program.Dispatch(workgroups)); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); src.Release(); @@ -445,9 +440,9 @@ bool ShouldUseGpu(CC* cc) { output_tensors->resize(1); { GpuTensor& tensor = output_tensors->at(0); - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( gpu_data_out_->elements, &tensor)); - RET_CHECK_CALL(CopyBuffer(gpu_data_out_->buffer, tensor)); + MP_RETURN_IF_ERROR(CopyBuffer(gpu_data_out_->buffer, tensor)); } return ::mediapipe::OkStatus(); })); @@ -521,7 +516,7 @@ bool ShouldUseGpu(CC* cc) { MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &include_alpha, &input, &single_channel]() -> ::mediapipe::Status { // Device memory. - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer( gpu_data_out_->elements, &gpu_data_out_->buffer)); @@ -544,7 +539,13 @@ bool ShouldUseGpu(CC* cc) { $6 // alpha channel })", /*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(), - /*$3=*/zero_center_ ? "pixel = (pixel - 0.5) * 2.0;" : "", + /*$3=*/ + output_range_.has_value() + ? absl::Substitute( + "pixel = pixel * float($0) + float($1);", + (output_range_->second - output_range_->first), + output_range_->first) + : "", /*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y", /*$5=*/ single_channel @@ -555,10 +556,10 @@ bool ShouldUseGpu(CC* cc) { include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;" : "", /*$7=*/max_num_channels_); - RET_CHECK_CALL(GlShader::CompileShader(GL_COMPUTE_SHADER, shader_source, - &gpu_data_out_->shader)); - RET_CHECK_CALL(GlProgram::CreateWithShader(gpu_data_out_->shader, - &gpu_data_out_->program)); + MP_RETURN_IF_ERROR(GlShader::CompileShader( + GL_COMPUTE_SHADER, shader_source, &gpu_data_out_->shader)); + MP_RETURN_IF_ERROR(GlProgram::CreateWithShader( + gpu_data_out_->shader, &gpu_data_out_->program)); return ::mediapipe::OkStatus(); })); @@ -599,7 +600,12 @@ bool ShouldUseGpu(CC* cc) { )", /*$0=*/include_alpha ? "float4" : "float3", /*$1=*/include_alpha ? "rgba" : "rgb", - /*$2=*/zero_center_ ? "pixel = (pixel - 0.5) * 2.0;" : "", + /*$2=*/ + output_range_.has_value() + ? absl::Substitute("pixel = pixel * float($0) + float($1);", + (output_range_->second - output_range_->first), + output_range_->first) + : "", /*$3=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y", /*$4=*/include_alpha ? 4 : 3, /*$5=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : ""); @@ -630,13 +636,27 @@ bool ShouldUseGpu(CC* cc) { const auto& options = cc->Options<::mediapipe::TfLiteConverterCalculatorOptions>(); - // Get data normalization mode. - zero_center_ = options.zero_center(); + // if zero_center, set output float range to match [-1, 1] as specified in + // calculator proto. + if (options.zero_center()) { + output_range_.emplace(std::pair(-1.0, 1.0)); + } + + // Custom output_tensor_float_range values. + // If the float range is specified in pb text, use the specified values + // instead. + if (options.has_output_tensor_float_range()) { + output_range_.emplace(options.output_tensor_float_range().min(), + options.output_tensor_float_range().max()); + CHECK_GT(output_range_->second, output_range_->first); + } // Custom div and sub values. - use_custom_normalization_ = options.use_custom_normalization(); - custom_div_ = options.custom_div(); - custom_sub_ = options.custom_sub(); + if (options.use_custom_normalization()) { + output_range_.emplace(std::pair( + -options.custom_sub(), + -options.custom_sub() + 255.0 / options.custom_div())); + } // Get y-flip mode. flip_vertically_ = options.flip_vertically(); @@ -664,40 +684,46 @@ bool ShouldUseGpu(CC* cc) { template ::mediapipe::Status TfLiteConverterCalculator::NormalizeImage( - const ImageFrame& image_frame, bool zero_center, bool flip_vertically, - float* tensor_ptr) { + const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr) { const int height = image_frame.Height(); const int width = image_frame.Width(); const int channels = image_frame.NumberOfChannels(); const int channels_preserved = std::min(channels, max_num_channels_); const int channels_ignored = channels - channels_preserved; - float div, sub; + if (output_range_.has_value()) { + // If the output float range is set and we are not using custom + // normalization, normalize the pixel values from [0, 255] to the specified + // output range. + RET_CHECK_NE(output_range_->first, output_range_->second); + const float scale = (output_range_->second - output_range_->first) / 255.0f; + const float bias = output_range_->first; - if (use_custom_normalization_) { - RET_CHECK_GT(custom_div_, 0.0f); - RET_CHECK_GE(custom_sub_, 0.0f); - div = custom_div_; - sub = custom_sub_; - } else if (zero_center) { - // [-1,1] - div = 127.5f; - sub = 1.0f; - } else { - // [0,1] - div = 255.0f; - sub = 0.0f; - } - - for (int i = 0; i < height; ++i) { - const T* image_ptr = reinterpret_cast( - image_frame.PixelData() + - (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); - for (int j = 0; j < width; ++j) { - for (int c = 0; c < channels_preserved; ++c) { - *tensor_ptr++ = *image_ptr++ / div - sub; + for (int i = 0; i < height; ++i) { + const T* image_ptr = reinterpret_cast( + image_frame.PixelData() + + (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); + for (int j = 0; j < width; ++j) { + for (int c = 0; c < channels_preserved; ++c) { + *tensor_ptr++ = *image_ptr++ * scale + bias; + } + image_ptr += channels_ignored; + } + } + } else { + // [0,1], scale only (bias == 0) + // Verified that there are no precision issues with 1.0f / 255.0f expression + const float scale = 1.0f / 255.0f; + for (int i = 0; i < height; ++i) { + const T* image_ptr = reinterpret_cast( + image_frame.PixelData() + + (flip_vertically ? height - 1 - i : i) * image_frame.WidthStep()); + for (int j = 0; j < width; ++j) { + for (int c = 0; c < channels_preserved; ++c) { + *tensor_ptr++ = *image_ptr++ * scale; + } + image_ptr += channels_ignored; } - image_ptr += channels_ignored; } } diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.proto b/mediapipe/calculators/tflite/tflite_converter_calculator.proto index 4d468c851..5ed70879d 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.proto @@ -56,4 +56,14 @@ message TfLiteConverterCalculatorOptions { // Quantization option (CPU only). // When true, output kTfLiteUInt8 tensor instead of kTfLiteFloat32. optional bool use_quantized_tensors = 5 [default = false]; + + // Normalization option. + // Setting normalization_range results in the values normalized to + // the range [output_tensor_float_range.min, output_tensor_float_range.max]. + optional TensorFloatRange output_tensor_float_range = 9; + + message TensorFloatRange { + optional float min = 1; + optional float max = 2; + } } diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc b/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc index cecf84e6f..c8762b09b 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator_test.cc @@ -16,6 +16,7 @@ #include #include "absl/memory/memory.h" +#include "absl/strings/substitute.h" #include "mediapipe/calculators/tflite/tflite_converter_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_runner.h" @@ -40,6 +41,7 @@ constexpr char kTransposeOptionsString[] = } // namespace using RandomEngine = std::mt19937_64; +using testing::Eq; const uint32 kSeed = 1234; const int kNumSizes = 8; const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2}, @@ -232,7 +234,6 @@ TEST_F(TfLiteConverterCalculatorTest, CustomDivAndSub) { // Wait until the calculator done processing. MP_ASSERT_OK(graph.WaitUntilIdle()); - EXPECT_EQ(1, output_packets.size()); // Get and process results. const std::vector& tensor_vec = @@ -249,4 +250,70 @@ TEST_F(TfLiteConverterCalculatorTest, CustomDivAndSub) { MP_ASSERT_OK(graph.WaitUntilDone()); } +TEST_F(TfLiteConverterCalculatorTest, SetOutputRange) { + std::vector> range_values = { + std::make_pair(0.0, 1.0), std::make_pair(-1.0, 1.0), + std::make_pair(-0.5, 0.5)}; + for (std::pair range : range_values) { + CalculatorGraph graph; + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + absl::Substitute(R"( + input_stream: "input_image" + node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + output_tensor_float_range { + min: $0 + max: $1 + } + } + } + } + )", + /*$0=*/range.first, + /*$1=*/range.second)); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 1); + cv::Mat mat = ::mediapipe::formats::MatView(input_image.get()); + mat.at(0, 0) = 200; + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_THAT(output_packets.size(), Eq(1)); + + // Get and process results. + const std::vector& tensor_vec = + output_packets[0].Get>(); + EXPECT_THAT(tensor_vec.size(), Eq(1)); + + const TfLiteTensor* tensor = &tensor_vec[0]; + + // Calculate the expected normalized value: + float normalized_value = + range.first + (200 * (range.second - range.first)) / 255.0; + + EXPECT_THAT(tensor->type, Eq(kTfLiteFloat32)); + EXPECT_THAT(normalized_value, + testing::FloatNear(*tensor->data.f, + 2.0f * std::abs(*tensor->data.f) * + std::numeric_limits::epsilon())); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.WaitUntilDone()); + } +} + } // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index cd881102d..8ed8a7ae8 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -19,7 +19,6 @@ #include "absl/memory/memory.h" #include "mediapipe/calculators/tflite/tflite_inference_calculator.pb.h" -#include "mediapipe/calculators/tflite/util.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/util/tflite/config.h" @@ -496,7 +495,7 @@ bool ShouldUseGpu(CC* cc) { output_tensors_gpu->resize(gpu_data_out_.size()); for (int i = 0; i < gpu_data_out_.size(); ++i) { GpuTensor& tensor = output_tensors_gpu->at(i); - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( gpu_data_out_[i]->elements, &tensor)); MP_RETURN_IF_ERROR( tflite_gpu_runner_->BindSSBOToOutputTensor(tensor.id(), i)); @@ -518,7 +517,7 @@ bool ShouldUseGpu(CC* cc) { // Explicit copy input. gpu_data_in_.resize(input_tensors.size()); for (int i = 0; i < input_tensors.size(); ++i) { - RET_CHECK_CALL(CopyBuffer(input_tensors[i], gpu_data_in_[i]->buffer)); + MP_RETURN_IF_ERROR(CopyBuffer(input_tensors[i], gpu_data_in_[i]->buffer)); } #elif MEDIAPIPE_TFLITE_METAL_INFERENCE const auto& input_tensors = @@ -582,7 +581,7 @@ bool ShouldUseGpu(CC* cc) { for (int i = 0; i < tensor_indexes.size(); ++i) { TfLiteTensor* tensor = interpreter_->tensor(tensor_indexes[i]); std::vector gpu_data(tensor->bytes / sizeof(float)); - RET_CHECK_CALL(gpu_data_out_[i]->buffer.Read( + MP_RETURN_IF_ERROR(gpu_data_out_[i]->buffer.Read( absl::MakeSpan(tensor->data.f, tensor->bytes))); output_tensors_cpu->emplace_back(*tensor); } @@ -599,9 +598,9 @@ bool ShouldUseGpu(CC* cc) { for (int i = 0; i < gpu_data_out_.size(); ++i) { GpuTensor& tensor = output_tensors_gpu->at(i); // Allocate output tensor. - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( gpu_data_out_[i]->elements, &tensor)); - RET_CHECK_CALL(CopyBuffer(gpu_data_out_[i]->buffer, tensor)); + MP_RETURN_IF_ERROR(CopyBuffer(gpu_data_out_[i]->buffer, tensor)); } cc->Outputs() .Tag(kTensorsGpuTag) @@ -655,7 +654,8 @@ bool ShouldUseGpu(CC* cc) { options.priority3 = tflite::gpu::InferencePriority::AUTO; options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED; tflite_gpu_runner_ = std::make_unique(options); - RET_CHECK_CALL(tflite_gpu_runner_->InitializeWithModel(model, op_resolver)); + MP_RETURN_IF_ERROR( + tflite_gpu_runner_->InitializeWithModel(model, op_resolver)); // Allocate interpreter memory for cpu output. if (!gpu_output_) { @@ -688,10 +688,11 @@ bool ShouldUseGpu(CC* cc) { ASSIGN_OR_RETURN(gpu_data_out_[i]->elements, tflite_gpu_runner_->GetOutputElements(i)); // Create and bind input buffer. - RET_CHECK_CALL(::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer( - gpu_data_out_[i]->elements, &gpu_data_out_[i]->buffer)); + MP_RETURN_IF_ERROR( + ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer( + gpu_data_out_[i]->elements, &gpu_data_out_[i]->buffer)); } - RET_CHECK_CALL(tflite_gpu_runner_->Build()); + MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build()); #endif // MEDIAPIPE_TFLITE_GL_INFERENCE return ::mediapipe::OkStatus(); @@ -841,7 +842,7 @@ bool ShouldUseGpu(CC* cc) { gpu_data_in_[i]->elements *= tensor->dims->data[d]; } // Create and bind input buffer. - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer( gpu_data_in_[i]->elements, &gpu_data_in_[i]->buffer)); RET_CHECK_EQ(TfLiteGpuDelegateBindBufferToTensor( @@ -866,7 +867,7 @@ bool ShouldUseGpu(CC* cc) { // Create and bind output buffers. interpreter_->SetAllowBufferHandleOutput(true); for (int i = 0; i < gpu_data_out_.size(); ++i) { - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( gpu_data_out_[i]->elements, &gpu_data_out_[i]->buffer)); RET_CHECK_EQ(TfLiteGpuDelegateBindBufferToTensor( delegate_.get(), gpu_data_out_[i]->buffer.id(), diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index 412c07125..ec07aab98 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -18,7 +18,6 @@ #include "absl/strings/str_format.h" #include "absl/types/span.h" #include "mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.pb.h" -#include "mediapipe/calculators/tflite/util.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/deps/file_path.h" #include "mediapipe/framework/formats/detection.pb.h" @@ -404,8 +403,10 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); &output_detections]() -> ::mediapipe::Status { // Copy inputs. - RET_CHECK_CALL(CopyBuffer(input_tensors[0], gpu_data_->raw_boxes_buffer)); - RET_CHECK_CALL(CopyBuffer(input_tensors[1], gpu_data_->raw_scores_buffer)); + MP_RETURN_IF_ERROR( + CopyBuffer(input_tensors[0], gpu_data_->raw_boxes_buffer)); + MP_RETURN_IF_ERROR( + CopyBuffer(input_tensors[1], gpu_data_->raw_scores_buffer)); if (!anchors_init_) { if (side_packet_anchors_) { CHECK(!cc->InputSidePackets().Tag("ANCHORS").IsEmpty()); @@ -413,11 +414,11 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); cc->InputSidePackets().Tag("ANCHORS").Get>(); std::vector raw_anchors(num_boxes_ * kNumCoordsPerBox); ConvertAnchorsToRawValues(anchors, num_boxes_, raw_anchors.data()); - RET_CHECK_CALL(gpu_data_->raw_anchors_buffer.Write( + MP_RETURN_IF_ERROR(gpu_data_->raw_anchors_buffer.Write( absl::MakeSpan(raw_anchors))); } else { CHECK_EQ(input_tensors.size(), kNumInputTensorsWithAnchors); - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( CopyBuffer(input_tensors[2], gpu_data_->raw_anchors_buffer)); } anchors_init_ = true; @@ -425,23 +426,24 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); // Run shaders. // Decode boxes. - RET_CHECK_CALL(gpu_data_->decoded_boxes_buffer.BindToIndex(0)); - RET_CHECK_CALL(gpu_data_->raw_boxes_buffer.BindToIndex(1)); - RET_CHECK_CALL(gpu_data_->raw_anchors_buffer.BindToIndex(2)); + MP_RETURN_IF_ERROR(gpu_data_->decoded_boxes_buffer.BindToIndex(0)); + MP_RETURN_IF_ERROR(gpu_data_->raw_boxes_buffer.BindToIndex(1)); + MP_RETURN_IF_ERROR(gpu_data_->raw_anchors_buffer.BindToIndex(2)); const tflite::gpu::uint3 decode_workgroups = {num_boxes_, 1, 1}; - RET_CHECK_CALL(gpu_data_->decode_program.Dispatch(decode_workgroups)); + MP_RETURN_IF_ERROR(gpu_data_->decode_program.Dispatch(decode_workgroups)); // Score boxes. - RET_CHECK_CALL(gpu_data_->scored_boxes_buffer.BindToIndex(0)); - RET_CHECK_CALL(gpu_data_->raw_scores_buffer.BindToIndex(1)); + MP_RETURN_IF_ERROR(gpu_data_->scored_boxes_buffer.BindToIndex(0)); + MP_RETURN_IF_ERROR(gpu_data_->raw_scores_buffer.BindToIndex(1)); const tflite::gpu::uint3 score_workgroups = {num_boxes_, 1, 1}; - RET_CHECK_CALL(gpu_data_->score_program.Dispatch(score_workgroups)); + MP_RETURN_IF_ERROR(gpu_data_->score_program.Dispatch(score_workgroups)); // Copy decoded boxes from GPU to CPU. std::vector boxes(num_boxes_ * num_coords_); - RET_CHECK_CALL(gpu_data_->decoded_boxes_buffer.Read(absl::MakeSpan(boxes))); + MP_RETURN_IF_ERROR( + gpu_data_->decoded_boxes_buffer.Read(absl::MakeSpan(boxes))); std::vector score_class_id_pairs(num_boxes_ * 2); - RET_CHECK_CALL(gpu_data_->scored_boxes_buffer.Read( + MP_RETURN_IF_ERROR(gpu_data_->scored_boxes_buffer.Read( absl::MakeSpan(score_class_id_pairs))); // TODO: b/138851969. Is it possible to output a float vector @@ -802,20 +804,20 @@ void main() { // Shader program GlShader decode_shader; - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( GlShader::CompileShader(GL_COMPUTE_SHADER, decode_src, &decode_shader)); - RET_CHECK_CALL(GpuProgram::CreateWithShader(decode_shader, - &gpu_data_->decode_program)); + MP_RETURN_IF_ERROR(GpuProgram::CreateWithShader( + decode_shader, &gpu_data_->decode_program)); // Outputs size_t decoded_boxes_length = num_boxes_ * num_coords_; - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( decoded_boxes_length, &gpu_data_->decoded_boxes_buffer)); // Inputs size_t raw_boxes_length = num_boxes_ * num_coords_; - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( raw_boxes_length, &gpu_data_->raw_boxes_buffer)); size_t raw_anchors_length = num_boxes_ * kNumCoordsPerBox; - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( raw_anchors_length, &gpu_data_->raw_anchors_buffer)); // Parameters glUseProgram(gpu_data_->decode_program.id()); @@ -896,17 +898,17 @@ void main() { // Shader program GlShader score_shader; - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( GlShader::CompileShader(GL_COMPUTE_SHADER, score_src, &score_shader)); - RET_CHECK_CALL( + MP_RETURN_IF_ERROR( GpuProgram::CreateWithShader(score_shader, &gpu_data_->score_program)); // Outputs size_t scored_boxes_length = num_boxes_ * 2; // score, class - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( scored_boxes_length, &gpu_data_->scored_boxes_buffer)); // Inputs size_t raw_scores_length = num_boxes_ * num_classes_; - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( raw_scores_length, &gpu_data_->raw_scores_buffer)); return ::mediapipe::OkStatus(); diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc index 23a85276d..3369840e4 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc @@ -17,7 +17,6 @@ #include "absl/strings/str_format.h" #include "absl/types/span.h" #include "mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.pb.h" -#include "mediapipe/calculators/tflite/util.h" #include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_frame.h" @@ -400,7 +399,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); // Create initial working mask texture. ::tflite::gpu::gl::GlTexture small_mask_texture; - RET_CHECK_CALL(CreateReadWriteRgbaImageTexture( + MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture( tflite::gpu::DataType::UINT8, // GL_RGBA8 {tensor_width_, tensor_height_}, &small_mask_texture)); @@ -410,7 +409,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); : mediapipe::GlTexture(); // Copy input tensor. - RET_CHECK_CALL(CopyBuffer(input_tensors[0], *tensor_buffer_)); + MP_RETURN_IF_ERROR(CopyBuffer(input_tensors[0], *tensor_buffer_)); // Run shader, process mask tensor. // Run softmax over tensor output and blend with previous mask. @@ -418,18 +417,18 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); const int output_index = 0; glBindImageTexture(output_index, small_mask_texture.id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); - RET_CHECK_CALL(tensor_buffer_->BindToIndex(2)); + MP_RETURN_IF_ERROR(tensor_buffer_->BindToIndex(2)); const tflite::gpu::uint3 workgroups = { NumGroups(tensor_width_, kWorkgroupSize), NumGroups(tensor_height_, kWorkgroupSize), 1}; if (!has_prev_mask) { - RET_CHECK_CALL(mask_program_no_prev_->Dispatch(workgroups)); + MP_RETURN_IF_ERROR(mask_program_no_prev_->Dispatch(workgroups)); } else { glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, input_mask_texture.name()); - RET_CHECK_CALL(mask_program_with_prev_->Dispatch(workgroups)); + MP_RETURN_IF_ERROR(mask_program_with_prev_->Dispatch(workgroups)); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, 0); } @@ -622,22 +621,22 @@ void main() { // Shader programs. GlShader shader_without_previous; - RET_CHECK_CALL(GlShader::CompileShader( + MP_RETURN_IF_ERROR(GlShader::CompileShader( GL_COMPUTE_SHADER, shader_src_no_previous, &shader_without_previous)); mask_program_no_prev_ = absl::make_unique(); - RET_CHECK_CALL(GlProgram::CreateWithShader(shader_without_previous, - mask_program_no_prev_.get())); + MP_RETURN_IF_ERROR(GlProgram::CreateWithShader( + shader_without_previous, mask_program_no_prev_.get())); GlShader shader_with_previous; - RET_CHECK_CALL(GlShader::CompileShader( + MP_RETURN_IF_ERROR(GlShader::CompileShader( GL_COMPUTE_SHADER, shader_src_with_previous, &shader_with_previous)); mask_program_with_prev_ = absl::make_unique(); - RET_CHECK_CALL(GlProgram::CreateWithShader(shader_with_previous, - mask_program_with_prev_.get())); + MP_RETURN_IF_ERROR(GlProgram::CreateWithShader( + shader_with_previous, mask_program_with_prev_.get())); // Buffer storage for input tensor. size_t tensor_length = tensor_width_ * tensor_height_ * tensor_channels_; tensor_buffer_ = absl::make_unique(); - RET_CHECK_CALL(CreateReadWriteShaderStorageBuffer( + MP_RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer( tensor_length, tensor_buffer_.get())); // Parameters. diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 7223ad44d..376b608b0 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -700,6 +700,8 @@ mediapipe_cc_proto_library( deps = [":rect_to_render_data_calculator_proto"], ) +# TODO: What is that one for? + mediapipe_cc_proto_library( name = "detections_to_render_data_calculator_cc_proto", srcs = ["detections_to_render_data_calculator.proto"], diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index 812522f7a..e66bc1095 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -160,6 +160,8 @@ class AnnotationOverlayCalculator : public CalculatorBase { GLuint image_mat_tex_ = 0; // Overlay drawing image for GPU. int width_ = 0; int height_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; #endif // MEDIAPIPE_DISABLE_GPU }; REGISTER_CALCULATOR(AnnotationOverlayCalculator); @@ -248,6 +250,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); // Initialize the helper renderer library. renderer_ = absl::make_unique(); renderer_->SetFlipTextVertically(options_.flip_text_vertically()); + if (use_gpu_) renderer_->SetScaleFactor(options_.gpu_scale_factor()); // Set the output header based on the input header (if present). const char* input_tag = use_gpu_ ? kInputFrameTagGpu : kInputFrameTag; @@ -389,8 +392,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, image_mat_tex_); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width_, height_, GL_RGB, - GL_UNSIGNED_BYTE, overlay_image); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width_canvas_, height_canvas_, + GL_RGB, GL_UNSIGNED_BYTE, overlay_image); glBindTexture(GL_TEXTURE_2D, 0); } @@ -492,12 +495,13 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); if (format != mediapipe::ImageFormat::SRGBA && format != mediapipe::ImageFormat::SRGB) RET_CHECK_FAIL() << "Unsupported GPU input format: " << format; - image_mat = absl::make_unique(height_, width_, CV_8UC3); + image_mat = + absl::make_unique(height_canvas_, width_canvas_, CV_8UC3); memset(image_mat->data, kAnnotationBackgroundColor, - height_ * width_ * image_mat->elemSize()); + height_canvas_ * width_canvas_ * image_mat->elemSize()); } else { image_mat = absl::make_unique( - options_.canvas_height_px(), options_.canvas_width_px(), CV_8UC3, + height_canvas_, width_canvas_, CV_8UC3, cv::Scalar(options_.canvas_color().r(), options_.canvas_color().g(), options_.canvas_color().b())); } @@ -632,19 +636,29 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); kAnnotationBackgroundColor / 255.0, kAnnotationBackgroundColor / 255.0); - // Init texture for opencv rendered frame. - const auto& input_frame = - cc->Inputs().Tag(kInputFrameTagGpu).Get(); // Ensure GPU texture is divisible by 4. See b/138751944 for more info. - width_ = - RoundUp(input_frame.width(), ImageFrame::kGlDefaultAlignmentBoundary); - height_ = - RoundUp(input_frame.height(), ImageFrame::kGlDefaultAlignmentBoundary); + const float alignment = ImageFrame::kGlDefaultAlignmentBoundary; + const float scale_factor = options_.gpu_scale_factor(); + if (image_frame_available_) { + const auto& input_frame = + cc->Inputs().Tag(kInputFrameTagGpu).Get(); + width_ = RoundUp(input_frame.width(), alignment); + height_ = RoundUp(input_frame.height(), alignment); + } else { + width_ = RoundUp(options_.canvas_width_px(), alignment); + height_ = RoundUp(options_.canvas_height_px(), alignment); + } + width_canvas_ = RoundUp(width_ * scale_factor, alignment); + height_canvas_ = RoundUp(height_ * scale_factor, alignment); + + // Init texture for opencv rendered frame. { glGenTextures(1, &image_mat_tex_); glBindTexture(GL_TEXTURE_2D, image_mat_tex_); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, width_, height_, 0, GL_RGB, - GL_UNSIGNED_BYTE, nullptr); + // TODO + // OpenCV only renders to RGB images, not RGBA. Ideally this should be RGBA. + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, width_canvas_, height_canvas_, 0, + GL_RGB, GL_UNSIGNED_BYTE, nullptr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.proto b/mediapipe/calculators/util/annotation_overlay_calculator.proto index 4391a1f2a..339bb2183 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.proto +++ b/mediapipe/calculators/util/annotation_overlay_calculator.proto @@ -45,4 +45,10 @@ message AnnotationOverlayCalculatorOptions { // origin. (Historically, OpenGL uses bottom left origin, but most MediaPipe // examples expect textures to have top-left origin.) optional bool gpu_uses_top_left_origin = 6 [default = true]; + + // Scale factor for intermediate image for GPU rendering. + // This can be used to speed up annotation by drawing the annotation on an + // intermediate image with a reduced scale, e.g. 0.5 (of the input image width + // and height), before resizing and overlaying it on top of the input image. + optional float gpu_scale_factor = 7 [default = 1.0]; } diff --git a/mediapipe/calculators/util/detections_to_render_data_calculator.cc b/mediapipe/calculators/util/detections_to_render_data_calculator.cc index 731994d4f..5082cd363 100644 --- a/mediapipe/calculators/util/detections_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/detections_to_render_data_calculator.cc @@ -235,9 +235,10 @@ void DetectionsToRenderDataCalculator::AddLabels( const Detection& detection, const DetectionsToRenderDataCalculatorOptions& options, float text_line_height, RenderData* render_data) { - CHECK(detection.label().empty() || detection.label_id().empty()) - << "Either std::string or integer labels must be used for detection " - "but not both at the same time."; + CHECK(detection.label().empty() || detection.label_id().empty() || + detection.label_size() == detection.label_id_size()) + << "String or integer labels should be of same size. Or only one of them " + "is present."; const auto num_labels = std::max(detection.label_size(), detection.label_id_size()); CHECK_EQ(detection.score_size(), num_labels) diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index d171f8af1..57a500cc5 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -316,6 +316,8 @@ cc_library( "//mediapipe/util/tracking", "//mediapipe/util/tracking:box_tracker", "//mediapipe/util/tracking:tracking_visualization_utilities", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings", ], alwayslink = 1, diff --git a/mediapipe/calculators/video/box_tracker_calculator.cc b/mediapipe/calculators/video/box_tracker_calculator.cc index 9963d864a..a56392ee3 100644 --- a/mediapipe/calculators/video/box_tracker_calculator.cc +++ b/mediapipe/calculators/video/box_tracker_calculator.cc @@ -18,6 +18,8 @@ #include #include +#include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_set.h" #include "absl/strings/numbers.h" #include "mediapipe/calculators/video/box_tracker_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" @@ -193,12 +195,12 @@ class BoxTrackerCalculator : public CalculatorBase { TimedBoxProtoList initial_pos_; // Keeps tracks boxes that have already been initialized. - std::unordered_set initialized_ids_; + absl::node_hash_set initialized_ids_; // Non empty for batch mode tracking. std::string cache_dir_; // Ids to be tracked in batch_mode. - std::unordered_set batch_track_ids_; + absl::node_hash_set batch_track_ids_; int frame_num_ = 0; @@ -237,6 +239,11 @@ class BoxTrackerCalculator : public CalculatorBase { // Queued track time requests. std::vector queued_track_requests_; + // Stores the tracked ids that have been discarded actively, from continuous + // tracking data. It may accumulate across multiple frames. Once consumed, it + // should be cleared immediately. + absl::flat_hash_set actively_discarded_tracked_ids_; + // Add smooth transition between re-acquisition and previous tracked boxes. // `result_box` is the tracking result of one specific timestamp. The smoothed // result will be updated in place. @@ -1143,9 +1150,16 @@ void BoxTrackerCalculator::StreamTrack(const TrackingData& data, CHECK(box_map); CHECK(failed_ids); + // Cache the actively discarded tracked ids from the new tracking data. + for (const int discarded_id : + data.motion_data().actively_discarded_tracked_ids()) { + actively_discarded_tracked_ids_.insert(discarded_id); + } + // Track all existing boxes by one frame. MotionVectorFrame mvf; // Holds motion from current to previous frame. MotionVectorFrameFromTrackingData(data, &mvf); + mvf.actively_discarded_tracked_ids = &actively_discarded_tracked_ids_; if (forward) { MotionVectorFrame mvf_inverted; diff --git a/mediapipe/examples/android/README.md b/mediapipe/examples/android/README.md index 8ce927727..bc32c24da 100644 --- a/mediapipe/examples/android/README.md +++ b/mediapipe/examples/android/README.md @@ -1 +1 @@ -This directory contains MediaPipe example applications for Android. Please see [Solutions](https://solutions.mediapipe.dev)for details. +This directory contains MediaPipe example applications for Android. Please see [Solutions](https://solutions.mediapipe.dev) for details. diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java index 065e88f07..82c1f4478 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java @@ -43,19 +43,23 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES)); processor.setInputSidePackets(inputSidePackets); - processor.addPacketCallback( + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( OUTPUT_LANDMARKS_STREAM_NAME, (packet) -> { - Log.d(TAG, "Received multi face landmarks packet."); + Log.v(TAG, "Received multi face landmarks packet."); List multiFaceLandmarks = PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); - Log.d( + Log.v( TAG, "[TS:" + packet.getTimestamp() + "] " + getMultiFaceLandmarksDebugString(multiFaceLandmarks)); }); + } } private static String getMultiFaceLandmarksDebugString( diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java index 7305c9ef5..e45510c1c 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java @@ -43,29 +43,33 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { } }); - processor.addPacketCallback( + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( OUTPUT_LANDMARKS_STREAM_NAME, (packet) -> { byte[] landmarksRaw = PacketGetter.getProtoBytes(packet); try { NormalizedLandmarkList landmarks = NormalizedLandmarkList.parseFrom(landmarksRaw); if (landmarks == null) { - Log.d(TAG, "[TS:" + packet.getTimestamp() + "] No hand landmarks."); + Log.v(TAG, "[TS:" + packet.getTimestamp() + "] No hand landmarks."); return; } // Note: If hand_presence is false, these landmarks are useless. - Log.d( + Log.v( TAG, "[TS:" + packet.getTimestamp() + "] #Landmarks for hand: " + landmarks.getLandmarkCount()); - Log.d(TAG, getLandmarksDebugString(landmarks)); + Log.v(TAG, getLandmarksDebugString(landmarks)); } catch (InvalidProtocolBufferException e) { Log.e(TAG, "Couldn't Exception received - " + e); return; } }); + } } private static String getLandmarksDebugString(NormalizedLandmarkList landmarks) { diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD new file mode 100644 index 000000000..202cee82d --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD @@ -0,0 +1,62 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu_deps", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +android_binary( + name = "iristrackinggpu", + srcs = glob(["*.java"]), + assets = [ + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb", + "//mediapipe/modules/face_landmark:face_landmark.tflite", + "//mediapipe/modules/iris_landmark:iris_landmark.tflite", + "//mediapipe/modules/face_detection:face_detection_front.tflite", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.iristrackinggpu", + "appName": "Iris Tracking", + "mainActivity": ".MainActivity", + "cameraFacingFront": "True", + "binaryGraphName": "iris_tracking_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + }, + multidex = "native", + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java new file mode 100644 index 000000000..a979e698f --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java @@ -0,0 +1,40 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.iristrackinggpu; + +import android.graphics.SurfaceTexture; +import com.google.mediapipe.framework.Packet; +import java.util.HashMap; +import java.util.Map; + +/** Main activity of MediaPipe iris tracking app. */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "MainActivity"; + + private static final String FOCAL_LENGTH_STREAM_NAME = "focal_length_pixel"; + + @Override + protected void onCameraStarted(SurfaceTexture surfaceTexture) { + super.onCameraStarted(surfaceTexture); + + float focalLength = cameraHelper.getFocalLengthPixels(); + if (focalLength != Float.MIN_VALUE) { + Packet focalLengthSidePacket = processor.getPacketCreator().createFloat32(focalLength); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(FOCAL_LENGTH_STREAM_NAME, focalLengthSidePacket); + processor.setInputSidePackets(inputSidePackets); + } + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java index 4aee88768..0d4dfde7f 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java @@ -31,19 +31,23 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); - processor.addPacketCallback( + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( OUTPUT_LANDMARKS_STREAM_NAME, (packet) -> { - Log.d(TAG, "Received multi-hand landmarks packet."); + Log.v(TAG, "Received multi-hand landmarks packet."); List multiHandLandmarks = PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); - Log.d( + Log.v( TAG, "[TS:" + packet.getTimestamp() + "] " + getMultiHandLandmarksDebugString(multiHandLandmarks)); }); + } } private String getMultiHandLandmarksDebugString(List multiHandLandmarks) { diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc index ee403a5d0..cba751057 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc @@ -165,28 +165,53 @@ REGISTER_CALCULATOR(ContentZoomingCalculator); } namespace { -::mediapipe::Status UpdateRanges(const SalientRegion& region, float* xmin, +mediapipe::LocationData::RelativeBoundingBox ShiftDetection( + const mediapipe::LocationData::RelativeBoundingBox& relative_bounding_box, + const float y_offset_percent, const float x_offset_percent) { + auto shifted_bb = relative_bounding_box; + shifted_bb.set_ymin(relative_bounding_box.ymin() + + relative_bounding_box.height() * y_offset_percent); + shifted_bb.set_xmin(relative_bounding_box.xmin() + + relative_bounding_box.width() * x_offset_percent); + return shifted_bb; +} +mediapipe::autoflip::RectF ShiftDetection( + const mediapipe::autoflip::RectF& relative_bounding_box, + const float y_offset_percent, const float x_offset_percent) { + auto shifted_bb = relative_bounding_box; + shifted_bb.set_y(relative_bounding_box.y() + + relative_bounding_box.height() * y_offset_percent); + shifted_bb.set_x(relative_bounding_box.x() + + relative_bounding_box.width() * x_offset_percent); + return shifted_bb; +} +::mediapipe::Status UpdateRanges(const SalientRegion& region, + const float shift_vertical, + const float shift_horizontal, float* xmin, float* xmax, float* ymin, float* ymax) { if (!region.has_location_normalized()) { return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) << "SalientRegion did not have location normalized set."; } - *xmin = fmin(*xmin, region.location_normalized().x()); - *xmax = fmax(*xmax, region.location_normalized().x() + - region.location_normalized().width()); - *ymin = fmin(*ymin, region.location_normalized().y()); - *ymax = fmax(*ymax, region.location_normalized().y() + - region.location_normalized().height()); + auto location = ShiftDetection(region.location_normalized(), shift_vertical, + shift_horizontal); + *xmin = fmin(*xmin, location.x()); + *xmax = fmax(*xmax, location.x() + location.width()); + *ymin = fmin(*ymin, location.y()); + *ymax = fmax(*ymax, location.y() + location.height()); return ::mediapipe::OkStatus(); } ::mediapipe::Status UpdateRanges(const mediapipe::Detection& detection, - float* xmin, float* xmax, float* ymin, - float* ymax) { + const float shift_vertical, + const float shift_horizontal, float* xmin, + float* xmax, float* ymin, float* ymax) { RET_CHECK(detection.location_data().format() == mediapipe::LocationData::RELATIVE_BOUNDING_BOX) << "Face detection input is lacking required relative_bounding_box()"; - const auto& location = detection.location_data().relative_bounding_box(); + const auto& location = + ShiftDetection(detection.location_data().relative_bounding_box(), + shift_vertical, shift_horizontal); *xmin = fmin(*xmin, location.xmin()); *xmax = fmax(*xmax, location.xmin() + location.width()); *ymin = fmin(*ymin, location.ymin()); @@ -270,7 +295,9 @@ void MakeStaticFeatures(const int top_border, const int bottom_border, continue; } only_required_found = true; - MP_RETURN_IF_ERROR(UpdateRanges(region, &xmin, &xmax, &ymin, &ymax)); + MP_RETURN_IF_ERROR(UpdateRanges( + region, options_.detection_shift_vertical(), + options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax)); } } @@ -279,7 +306,9 @@ void MakeStaticFeatures(const int top_border, const int bottom_border, cc->Inputs().Tag(kDetections).Get>(); for (const auto& detection : raw_detections) { only_required_found = true; - MP_RETURN_IF_ERROR(UpdateRanges(detection, &xmin, &xmax, &ymin, &ymax)); + MP_RETURN_IF_ERROR(UpdateRanges( + detection, options_.detection_shift_vertical(), + options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax)); } } @@ -324,6 +353,19 @@ void MakeStaticFeatures(const int top_border, const int bottom_border, int path_offset_y; MP_RETURN_IF_ERROR(path_solver_offset_->GetState(&path_offset_y)); + // Prevent box from extending beyond the image after camera smoothing. + if (path_offset_y - ceil(path_height / 2.0) < 0) { + path_offset_y = ceil(path_height / 2.0); + } else if (path_offset_y + ceil(path_height / 2.0) > frame_height_) { + path_offset_y = frame_height_ - ceil(path_height / 2.0); + } + int path_width = path_height * target_aspect_; + if (path_offset_x - ceil(path_width / 2.0) < 0) { + path_offset_x = ceil(path_width / 2.0); + } else if (path_offset_x + ceil(path_width / 2.0) > frame_width_) { + path_offset_x = frame_width_ - ceil(path_width / 2.0); + } + // Convert to top/bottom borders to remove. int path_top = path_offset_y - path_height / 2; int path_bottom = frame_height_ - (path_offset_y + path_height / 2); diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto index bf0b8201b..2634a4afe 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto @@ -19,6 +19,7 @@ package mediapipe.autoflip; import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto"; import "mediapipe/framework/calculator.proto"; +// NextTag: 13 message ContentZoomingCalculatorOptions { extend mediapipe.CalculatorOptions { optional ContentZoomingCalculatorOptions ext = 313091992; @@ -44,6 +45,12 @@ message ContentZoomingCalculatorOptions { optional int64 height = 2; } optional Size target_size = 8; + // Amount to shift an input detection as a ratio of the size (positive: + // down/right, negative: up/left). Use a negative value to increase padding + // above/left of an object, positive to increase padding below/right of an + // object. + optional float detection_shift_vertical = 11 [default = 0.0]; + optional float detection_shift_horizontal = 12 [default = 0.0]; // Deprecated parameters optional KinematicOptions kinematic_options = 2 [deprecated = true]; diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc index a37e09c57..e20ebba12 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc @@ -344,6 +344,67 @@ TEST(ContentZoomingCalculatorTest, ZoomTestPairSize) { CheckBorder(static_features, 1000, 1000, 495, 395); } +TEST(ContentZoomingCalculatorTest, ZoomTestNearOutsideBorder) { + auto runner = ::absl::make_unique( + ParseTextProtoOrDie(kConfigD)); + AddDetection(cv::Rect_(.95, .95, .05, .05), 0, runner.get()); + AddDetection(cv::Rect_(.9, .9, .1, .1), 1000000, runner.get()); + MP_ASSERT_OK(runner->Run()); + CheckCropRect(972, 972, 55, 55, 0, + runner->Outputs().Tag("CROP_RECT").packets); + CheckCropRect(958, 958, 83, 83, 1, + runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) { + auto runner = ::absl::make_unique( + ParseTextProtoOrDie(kConfigD)); + AddDetection(cv::Rect_(0, 0, .05, .05), 0, runner.get()); + AddDetection(cv::Rect_(0, 0, .1, .1), 1000000, runner.get()); + MP_ASSERT_OK(runner->Run()); + CheckCropRect(28, 28, 55, 55, 0, runner->Outputs().Tag("CROP_RECT").packets); + CheckCropRect(42, 42, 83, 83, 1, runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, VerticalShift) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_vertical(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.1, .1, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + // 1000px * .1 offset + 1000*.1*.1 shift = 170 + CheckCropRect(150, 170, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, HorizontalShift) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_horizontal(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.1, .1, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + // 1000px * .1 offset + 1000*.1*.1 shift = 170 + CheckCropRect(170, 150, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_vertical(-0.2); + options->set_detection_shift_horizontal(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.9, 0, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + CheckCropRect(944, 56, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + } // namespace } // namespace autoflip diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc index 340c4b253..3d37541cf 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc @@ -10,6 +10,10 @@ namespace autoflip { current_time_ = time_us; initialized_ = true; current_velocity_deg_per_s_ = 0; + RET_CHECK_GT(pixels_per_degree_, 0) + << "pixels_per_degree must be larger than 0."; + RET_CHECK_GE(options_.min_motion_to_reframe(), options_.reframe_window()) + << "Reframe window cannot exceed min_motion_to_reframe."; return ::mediapipe::OkStatus(); } @@ -22,6 +26,14 @@ namespace autoflip { if (abs(delta_degs) < options_.min_motion_to_reframe()) { position = current_position_px_; delta_degs = 0; + } else if (delta_degs > 0) { + // Apply new position, less the reframe window size. + position = position - pixels_per_degree_ * options_.reframe_window(); + delta_degs = (position - current_position_px_) / pixels_per_degree_; + } else { + // Apply new position, plus the reframe window size. + position = position + pixels_per_degree_ * options_.reframe_window(); + delta_degs = (position - current_position_px_) / pixels_per_degree_; } // Time and position updates. diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto index eda04c4b1..552ead0d9 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto @@ -10,4 +10,9 @@ message KinematicOptions { optional double max_velocity = 2 [default = 18]; // Min motion (in degrees) to react in pixels. optional float min_motion_to_reframe = 3 [default = 1.8]; + // When motion exceeds min_motion_to_reframe, move within this distance of the + // camera from the starting direction. Setting this value non-zero reduces + // total reframe distance on average. Value cannot exceed + // min_motion_to_reframe value. + optional float reframe_window = 4 [default = 0]; } diff --git a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc index 5d5717589..d751bd1e3 100644 --- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc @@ -27,6 +27,12 @@ namespace mediapipe { namespace autoflip { namespace { +TEST(KinematicPathSolverTest, FailZeroPixelsPerDegree) { + KinematicOptions options; + KinematicPathSolver solver(options, 0, 1000, 0); + EXPECT_FALSE(solver.AddObservation(500, kMicroSecInSec * 0).ok()); +} + TEST(KinematicPathSolverTest, FailNotInitializedState) { KinematicOptions options; KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); @@ -109,6 +115,38 @@ TEST(KinematicPathSolverTest, PassEnoughMotionSmallImg) { EXPECT_EQ(state, 410); } +TEST(KinematicPathSolverTest, FailReframeWindowSetting) { + KinematicOptions options; + // Set min motion to 1deg + options.set_min_motion_to_reframe(1.0); + options.set_update_rate(1); + options.set_max_velocity(1000); + // Set reframe window size to .75 for test. + options.set_reframe_window(1.1); + // Set degrees / pixel to 16.6 + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + ASSERT_FALSE(solver.AddObservation(500, kMicroSecInSec * 0).ok()); +} + +TEST(KinematicPathSolverTest, PassReframeWindow) { + KinematicOptions options; + // Set min motion to 1deg + options.set_min_motion_to_reframe(1.0); + options.set_update_rate(1); + options.set_max_velocity(1000); + // Set reframe window size to .75 for test. + options.set_reframe_window(0.75); + // Set degrees / pixel to 16.6 + KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView); + int state; + MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0)); + // Move target by 20px / 16.6 = 1.2deg + MP_ASSERT_OK(solver.AddObservation(520, kMicroSecInSec * 1)); + MP_ASSERT_OK(solver.GetState(&state)); + // Expect cam to move 1.2-.75 deg, * 16.6 = 7.47px + 500 = + EXPECT_EQ(state, 507); +} + TEST(KinematicPathSolverTest, PassUpdateRate) { KinematicOptions options; options.set_min_motion_to_reframe(1.0); diff --git a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc index acb66ced6..420cb8146 100644 --- a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc +++ b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc @@ -144,11 +144,9 @@ namespace autoflip { // renderer. for (int i = 0; i < num_scene_frames; i++) { const int left = -(scene_frame_xforms[i].at(0, 2)); - const int right = left + crop_width; - const int top = top_static_border_size; - const int bottom = frame_height_ - bottom_static_border_size; - crop_from_location->push_back( - cv::Rect(left, top, right - left, bottom - top)); + const int top = + top_static_border_size - (scene_frame_xforms[i].at(1, 2)); + crop_from_location->push_back(cv::Rect(left, top, crop_width, crop_height)); } // If no cropped_frames is passed in, return directly. diff --git a/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc b/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc index e0e4f9d15..fb2c989b2 100644 --- a/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc @@ -201,5 +201,29 @@ TEST(SceneCropperTest, CropFramesWorksWithPriorFocusPointFrames) { } } +// Checks that crop_from_locations gets the correct results. +TEST(SceneCropperTest, CropFromLocation) { + CameraMotionOptions options; + options.mutable_polynomial_path_solver()->set_prior_frame_buffer_size(30); + SceneCropper scene_cropper(options, kSceneWidth, kSceneHeight); + std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); + MP_EXPECT_OK(scene_cropper.CropFrames( + GetDefaultSceneKeyFrameCropSummary(), GetTimestamps(scene_frames.size()), + GetIsKeyframe(scene_frames.size()), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(3), 0, 0, false, + &crop_from_locations, &cropped_frames)); + EXPECT_EQ(cropped_frames.size(), kNumSceneFrames); + for (int i = 0; i < kNumSceneFrames; ++i) { + EXPECT_EQ(cropped_frames[i].rows, kCropHeight); + EXPECT_EQ(cropped_frames[i].cols, kCropWidth); + } + for (int i = 0; i < kNumSceneFrames; ++i) { + EXPECT_EQ(crop_from_locations[i].height, kCropHeight); + EXPECT_EQ(crop_from_locations[i].width, kCropWidth); + } +} + } // namespace autoflip } // namespace mediapipe diff --git a/mediapipe/examples/desktop/iris_tracking/BUILD b/mediapipe/examples/desktop/iris_tracking/BUILD new file mode 100644 index 000000000..430922115 --- /dev/null +++ b/mediapipe/examples/desktop/iris_tracking/BUILD @@ -0,0 +1,60 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "iris_depth_from_image_desktop", + srcs = ["iris_depth_from_image_desktop.cc"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:commandlineflags", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "//mediapipe/graphs/iris_tracking:iris_depth_cpu_deps", + ], +) + +cc_binary( + name = "iris_tracking_cpu_video_input", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/iris_tracking:iris_tracking_cpu_video_input_deps", + ], +) + +cc_binary( + name = "iris_tracking_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", + ], +) + +# Linux only +cc_binary( + name = "iris_tracking_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_gpu", + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu_deps", + ], +) diff --git a/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc b/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc new file mode 100644 index 000000000..4cfab621d --- /dev/null +++ b/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc @@ -0,0 +1,162 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A utility to extract iris depth from a single image of face using the graph +// mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt. +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/commandlineflags.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +constexpr char kInputStream[] = "input_image_bytes"; +constexpr char kOutputImageStream[] = "output_image"; +constexpr char kLeftIrisDepthMmStream[] = "left_iris_depth_mm"; +constexpr char kRightIrisDepthMmStream[] = "right_iris_depth_mm"; +constexpr char kWindowName[] = "MediaPipe"; +constexpr char kCalculatorGraphConfigFile[] = + "mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt"; +constexpr float kMicrosPerSecond = 1e6; + +DEFINE_string(input_image_path, "", + "Full path of image to load. " + "If not provided, nothing will run."); +DEFINE_string(output_image_path, "", + "Full path of where to save image result (.jpg only). " + "If not provided, show result in a window."); + +namespace { + +::mediapipe::StatusOr ReadFileToString( + const std::string& file_path) { + std::string contents; + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents(file_path, &contents)); + return contents; +} + +::mediapipe::Status ProcessImage( + std::unique_ptr<::mediapipe::CalculatorGraph> graph) { + LOG(INFO) << "Load the image."; + ASSIGN_OR_RETURN(const std::string raw_image, + ReadFileToString(FLAGS_input_image_path)); + + LOG(INFO) << "Start running the calculator graph."; + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller output_image_poller, + graph->AddOutputStreamPoller(kOutputImageStream)); + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller left_iris_depth_poller, + graph->AddOutputStreamPoller(kLeftIrisDepthMmStream)); + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller right_iris_depth_poller, + graph->AddOutputStreamPoller(kRightIrisDepthMmStream)); + MP_RETURN_IF_ERROR(graph->StartRun({})); + + // Send image packet into the graph. + const size_t fake_timestamp_us = (double)cv::getTickCount() / + (double)cv::getTickFrequency() * + kMicrosPerSecond; + MP_RETURN_IF_ERROR(graph->AddPacketToInputStream( + kInputStream, ::mediapipe::MakePacket(raw_image).At( + ::mediapipe::Timestamp(fake_timestamp_us)))); + + // Get the graph result packets, or stop if that fails. + ::mediapipe::Packet left_iris_depth_packet; + if (!left_iris_depth_poller.Next(&left_iris_depth_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'left_iris_depth_mm'."); + } + const auto& left_iris_depth_mm = left_iris_depth_packet.Get(); + const int left_iris_depth_cm = std::round(left_iris_depth_mm / 10); + std::cout << "Left Iris Depth: " << left_iris_depth_cm << " cm." << std::endl; + + ::mediapipe::Packet right_iris_depth_packet; + if (!right_iris_depth_poller.Next(&right_iris_depth_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'right_iris_depth_mm'."); + } + const auto& right_iris_depth_mm = right_iris_depth_packet.Get(); + const int right_iris_depth_cm = std::round(right_iris_depth_mm / 10); + std::cout << "Right Iris Depth: " << right_iris_depth_cm << " cm." + << std::endl; + + ::mediapipe::Packet output_image_packet; + if (!output_image_poller.Next(&output_image_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'output_image'."); + } + auto& output_frame = output_image_packet.Get<::mediapipe::ImageFrame>(); + + // Convert back to opencv for display or saving. + cv::Mat output_frame_mat = ::mediapipe::formats::MatView(&output_frame); + cv::cvtColor(output_frame_mat, output_frame_mat, cv::COLOR_RGB2BGR); + const bool save_image = !FLAGS_output_image_path.empty(); + if (save_image) { + LOG(INFO) << "Saving image to file..."; + cv::imwrite(FLAGS_output_image_path, output_frame_mat); + } else { + cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); + cv::imshow(kWindowName, output_frame_mat); + // Press any key to exit. + cv::waitKey(0); + } + + LOG(INFO) << "Shutting down."; + MP_RETURN_IF_ERROR(graph->CloseInputStream(kInputStream)); + return graph->WaitUntilDone(); +} + +::mediapipe::Status RunMPPGraph() { + std::string calculator_graph_config_contents; + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents( + kCalculatorGraphConfigFile, &calculator_graph_config_contents)); + LOG(INFO) << "Get calculator graph config contents: " + << calculator_graph_config_contents; + ::mediapipe::CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie<::mediapipe::CalculatorGraphConfig>( + calculator_graph_config_contents); + + LOG(INFO) << "Initialize the calculator graph."; + std::unique_ptr<::mediapipe::CalculatorGraph> graph = + absl::make_unique<::mediapipe::CalculatorGraph>(); + MP_RETURN_IF_ERROR(graph->Initialize(config)); + + const bool load_image = !FLAGS_input_image_path.empty(); + if (load_image) { + return ProcessImage(std::move(graph)); + } else { + return ::mediapipe::InvalidArgumentError("Missing image file."); + } +} + +} // namespace + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + gflags::ParseCommandLineFlags(&argc, &argv, true); + ::mediapipe::Status run_status = RunMPPGraph(); + if (!run_status.ok()) { + LOG(ERROR) << "Failed to run the graph: " << run_status.message(); + return EXIT_FAILURE; + } else { + LOG(INFO) << "Success!"; + } + return EXIT_SUCCESS; +} diff --git a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py index a639e1056..205834cc8 100644 --- a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py +++ b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py @@ -30,7 +30,7 @@ SECONDS_TO_MICROSECONDS = 1000000 def bytes23(string): - """Creates a bytes string in either Python 2 or 3.""" + """Creates a bytes string in either Python 2 or 3.""" if sys.version_info >= (3, 0): return bytes(string, 'utf8') else: diff --git a/mediapipe/examples/ios/bundle_id.bzl b/mediapipe/examples/ios/bundle_id.bzl new file mode 100644 index 000000000..4866b07c6 --- /dev/null +++ b/mediapipe/examples/ios/bundle_id.bzl @@ -0,0 +1,26 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configuration helper for iOS app bundle ids and provisioning profiles. +""" + +BUNDLE_ID_PREFIX = "*SEE_IOS_INSTRUCTIONS*.mediapipe.examples" + +# Look for a provisioning profile in the example's directory first, +# otherwise look for a common one. +def example_provisioning(): + local_profile = native.glob(["provisioning_profile.mobileprovision"]) + if local_profile: + return local_profile[0] + return "//mediapipe/examples/ios:provisioning_profile" diff --git a/mediapipe/examples/ios/edgedetectiongpu/BUILD b/mediapipe/examples/ios/edgedetectiongpu/BUILD index 66ea1b066..46fb32a94 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/BUILD +++ b/mediapipe/examples/ios/edgedetectiongpu/BUILD @@ -12,14 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 - -MIN_IOS_VERSION = "10.0" - load( "@build_bazel_rules_apple//apple:ios.bzl", "ios_application", ) +load( + "//mediapipe/examples/ios:bundle_id.bzl", + "BUNDLE_ID_PREFIX", + "example_provisioning", +) + +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" alias( name = "edgedetectiongpu", @@ -28,14 +33,14 @@ alias( ios_application( name = "EdgeDetectionGpuApp", - bundle_id = "com.google.mediapipe.EdgeDetectionGpu", + bundle_id = BUNDLE_ID_PREFIX + ".EdgeDetectionGpu", families = [ "iphone", "ipad", ], infoplists = ["Info.plist"], minimum_os_version = MIN_IOS_VERSION, - provisioning_profile = "//mediapipe/examples/ios:provisioning_profile", + provisioning_profile = example_provisioning(), deps = [":EdgeDetectionGpuAppLibrary"], ) diff --git a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard index e3bd912a4..20845c12f 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard +++ b/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard @@ -1,10 +1,8 @@ - - - - + + - + @@ -18,11 +16,11 @@ - +