diff --git a/BUILD b/BUILD.bazel similarity index 94% rename from BUILD rename to BUILD.bazel index f225f24e3..1973f98af 100644 --- a/BUILD +++ b/BUILD.bazel @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) exports_files(["LICENSE"]) diff --git a/README.md b/README.md index 0a96c42c8..cef6213dd 100644 --- a/README.md +++ b/README.md @@ -22,13 +22,13 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Hands | Hair Segmentation -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :---------------: -[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) +Face Detection | Face Mesh | Iris | Hands +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: +[![face_detection](docs/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](docs/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](docs/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](docs/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) -Object Detection | Box Tracking | Objectron | KNIFT -:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT +:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![hair_segmentation](docs/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](docs/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](docs/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](docs/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](docs/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) @@ -37,6 +37,7 @@ Object Detection :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ @@ -63,6 +64,8 @@ never leaves your device. ![visualizer_runner](docs/images/visualizer_runner.png) * [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection) +* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking) +* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) @@ -83,6 +86,8 @@ run code search using ## Publications +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single + Image](https://mediapipe.page.link/iris-blog) in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) diff --git a/docs/framework_concepts/calculators.md b/docs/framework_concepts/calculators.md index 116f17d3b..6c7657bfd 100644 --- a/docs/framework_concepts/calculators.md +++ b/docs/framework_concepts/calculators.md @@ -405,8 +405,4 @@ packets (bottom) based on its series of input packets (top). | ![Graph using | : PacketClonerCalculator](../images/packet_cloner_calculator.png) : | :--------------------------------------------------------------------------: | -| *Each time it receives a packet on its TICK input stream, the | -: PacketClonerCalculator outputs the most recent packet from each of its input : -: streams. The sequence of output packets (bottom) is determined by the : -: sequence of input packets (top) and their timestamps. The timestamps are : -: shown along the right side of the diagram.* : +| *Each time it receives a packet on its TICK input stream, the PacketClonerCalculator outputs the most recent packet from each of its input streams. The sequence of output packets (bottom) is determined by the sequence of input packets (top) and their timestamps. The timestamps are shown along the right side of the diagram.* | diff --git a/docs/getting_started/building_examples.md b/docs/getting_started/building_examples.md index 089a1fefe..be50f9bc2 100644 --- a/docs/getting_started/building_examples.md +++ b/docs/getting_started/building_examples.md @@ -280,16 +280,16 @@ are two options: 2. In the project navigator in the left sidebar, select the "Mediapipe" project. -3. Select the "Signing & Capabilities" tab. +3. Select one of the application targets, e.g. HandTrackingGpuApp. -4. Select one of the application targets, e.g. HandTrackingGpuApp. +4. Select the "Signing & Capabilities" tab. 5. Check "Automatically manage signing", and confirm the dialog box. 6. Select "_Your Name_ (Personal Team)" in the Team pop-up menu. 7. This set-up needs to be done once for each application you want to install. - Repeat steps 4-6 as needed. + Repeat steps 3-6 as needed. This generates provisioning profiles for each app you have selected. Now we need to tell Bazel to use them. We have provided a script to make this easier. @@ -390,9 +390,6 @@ developer (yourself) is trusted. bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu ``` - This will open up your webcam as long as it is connected and on. Any errors - is likely due to your webcam being not accessible. - 2. To run the application: ```bash @@ -400,6 +397,9 @@ developer (yourself) is trusted. --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt ``` + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible. + ### Option 2: Running on GPU Note: This currently works only on Linux, and please first follow @@ -412,13 +412,13 @@ Note: This currently works only on Linux, and please first follow mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu ``` - This will open up your webcam as long as it is connected and on. Any errors - is likely due to your webcam being not accessible, or GPU drivers not setup - properly. - 2. To run the application: ```bash GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt ``` + + This will open up your webcam as long as it is connected and on. Any errors + is likely due to your webcam being not accessible, or GPU drivers not setup + properly. diff --git a/docs/images/mobile/iris_tracking_android_gpu.gif b/docs/images/mobile/iris_tracking_android_gpu.gif new file mode 100644 index 000000000..6214d9e5c Binary files /dev/null and b/docs/images/mobile/iris_tracking_android_gpu.gif differ diff --git a/docs/images/mobile/iris_tracking_android_gpu_small.gif b/docs/images/mobile/iris_tracking_android_gpu_small.gif new file mode 100644 index 000000000..050355476 Binary files /dev/null and b/docs/images/mobile/iris_tracking_android_gpu_small.gif differ diff --git a/docs/images/mobile/iris_tracking_depth_from_iris.gif b/docs/images/mobile/iris_tracking_depth_from_iris.gif new file mode 100644 index 000000000..2bcc80ea2 Binary files /dev/null and b/docs/images/mobile/iris_tracking_depth_from_iris.gif differ diff --git a/docs/images/mobile/iris_tracking_example.gif b/docs/images/mobile/iris_tracking_example.gif new file mode 100644 index 000000000..7988f3e95 Binary files /dev/null and b/docs/images/mobile/iris_tracking_example.gif differ diff --git a/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png b/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png new file mode 100644 index 000000000..1afb56395 Binary files /dev/null and b/docs/images/mobile/iris_tracking_eye_and_iris_landmarks.png differ diff --git a/docs/index.md b/docs/index.md index ea6c2feb3..bd27df416 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,13 +22,13 @@ desktop/cloud, web and IoT devices. ## ML solutions in MediaPipe -Face Detection | Face Mesh | Hands | Hair Segmentation -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :---------------: -[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) +Face Detection | Face Mesh | Iris | Hands +:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :---: +[![face_detection](images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) -Object Detection | Box Tracking | Objectron | KNIFT -:----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +Hair Segmentation | Object Detection | Box Tracking | Objectron | KNIFT +:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: +[![hair_segmentation](images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![objectron](images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) @@ -37,6 +37,7 @@ Object Detection :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ @@ -63,6 +64,8 @@ never leaves your device. ![visualizer_runner](images/visualizer_runner.png) * [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection) +* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking) +* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth) * [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking) * [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection) * [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation) @@ -83,6 +86,8 @@ run code search using ## Publications +* [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation from a Single + Image](https://mediapipe.page.link/iris-blog) in Google AI Blog * [MediaPipe KNIFT: Template-based feature matching](https://developers.googleblog.com/2020/04/mediapipe-knift-template-based-feature-matching.html) in Google Developers Blog * [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) diff --git a/docs/solutions/autoflip.md b/docs/solutions/autoflip.md index f78b4ae95..faad99e92 100644 --- a/docs/solutions/autoflip.md +++ b/docs/solutions/autoflip.md @@ -2,7 +2,7 @@ layout: default title: AutoFlip (Saliency-aware Video Cropping) parent: Solutions -nav_order: 9 +nav_order: 10 --- # AutoFlip: Saliency-aware Video Cropping diff --git a/docs/solutions/box_tracking.md b/docs/solutions/box_tracking.md index 007376168..5c73a97fb 100644 --- a/docs/solutions/box_tracking.md +++ b/docs/solutions/box_tracking.md @@ -2,7 +2,7 @@ layout: default title: Box Tracking parent: Solutions -nav_order: 6 +nav_order: 7 --- # MediaPipe Box Tracking diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index 9026eac8d..c678901a7 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -153,8 +153,8 @@ it, in the graph file modify the option of `ConstantSidePacketCalculator`. [Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs](https://arxiv.org/abs/1907.06724) ([poster](https://docs.google.com/presentation/d/1-LWwOMO9TzEVdrZ1CS1ndJzciRHfYDJfbSxH_ke_JRg/present?slide=id.g5986dd4b4c_4_212)) * Face detection model: - [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) -* Face landmark mode: - [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_landmark.tflite), + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite) +* Face landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) * [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) diff --git a/docs/solutions/hair_segmentation.md b/docs/solutions/hair_segmentation.md index 94cabce24..0dec46951 100644 --- a/docs/solutions/hair_segmentation.md +++ b/docs/solutions/hair_segmentation.md @@ -2,7 +2,7 @@ layout: default title: Hair Segmentation parent: Solutions -nav_order: 4 +nav_order: 5 --- # MediaPipe Hair Segmentation diff --git a/docs/solutions/hands.md b/docs/solutions/hands.md index f4d93d840..8edfd5850 100644 --- a/docs/solutions/hands.md +++ b/docs/solutions/hands.md @@ -2,7 +2,7 @@ layout: default title: Hands parent: Solutions -nav_order: 3 +nav_order: 4 --- # MediaPipe Hands diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md new file mode 100644 index 000000000..eb2ecdd94 --- /dev/null +++ b/docs/solutions/iris.md @@ -0,0 +1,204 @@ +--- +layout: default +title: Iris +parent: Solutions +nav_order: 3 +--- + +# MediaPipe Iris +{: .no_toc } + +1. TOC +{:toc} +--- + +## Overview + +A wide range of real-world applications, including computational photography +(glint reflection) and augmented reality effects (virtual avatars) rely on +accurately tracking the iris within an eye. This is a challenging task to solve +on mobile devices, due to the limited computing resources, variable light +conditions and the presence of occlusions, such as hair or people squinting. +Iris tracking can also be utilized to determine the metric distance of the +camera to the user. This can improve a variety of use cases, ranging from +virtual try-on of properly sized glasses and hats to accessibility features that +adopt the font size depending on the viewer’s distance. Often, sophisticated +specialized hardware is employed to compute the metric distance, limiting the +range of devices on which the solution could be applied. + +MediaPipe Iris is a ML solution for accurate iris estimation, able to track +landmarks involving the iris, pupil and the eye contours using a single RGB +camera, in real-time, without the need for specialized hardware. Through use of +iris landmarks, the solution is also able to determine the metric distance +between the subject and the camera with relative error less than 10%. Note that +iris tracking does not infer the location at which people are looking, nor does +it provide any form of identity recognition. With the cross-platfrom capability +of the MediaPipe framework, MediaPipe Iris can run on most modern +[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the +[web](#web). + +![iris_tracking_example.gif](../images/mobile/iris_tracking_example.gif) | +:------------------------------------------------------------------------: | +*Fig 1. Example of MediaPipe Iris: eyelid (red) and iris (blue) contours.* | + +## ML Pipeline + +The first step in the pipeline leverages [MediaPipe Face Mesh](./face_mesh.md), +which generates a mesh of the approximate face geometry. From this mesh, we +isolate the eye region in the original image for use in the subsequent iris +tracking step. + +The pipeline is implemented as a MediaPipe +[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) +that uses a +[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) +from the +[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark), +an +[iris landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_tracking/iris_landmark_left_and_right_gpu.pbtxt) +from the +[iris landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark), +and renders using a dedicated +[iris-and-depth renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/subgraphs/iris_and_depth_renderer_gpu.pbtxt). +The +[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) +internally uses a +[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt) +from the +[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection). + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +## Models + +### Face Detection Model + +The face detector is the same [bazelFace](https://arxiv.org/abs/1907.05047) +model used in [MediaPipe Face Detection](./face_detection.md). + +### Face Landmark Model + +The face landmark model is the same as in [MediaPipe Face Mesh](./face_mesh.md). +You can also find more details in this +[paper](https://arxiv.org/abs/1907.06724). + +### Iris Landmark Model + +The iris model takes an image patch of the eye region and estimates both the eye +landmarks (along the eyelid) and iris landmarks (along ths iris contour). You +can find more details in this [paper](https://arxiv.org/abs/2006.11341). + +![iris_tracking_eye_and_iris_landmarks.png](../images/mobile/iris_tracking_eye_and_iris_landmarks.png) | +:----------------------------------------------------------------------------------------------------: | +*Fig 2. Eye landmarks (red) and iris landmarks (green).* | + +## Depth-from-Iris + +MediaPipe Iris is able to determine the metric distance of a subject to the +camera with less than 10% error, without requiring any specialized hardware. +This is done by relying on the fact that the horizontal iris diameter of the +human eye remains roughly constant at 11.7±0.5 mm across a wide population, +along with some simple geometric arguments. For more details please refer to our +[Google AI Blog post](https://mediapipe.page.link/iris-blog). + +![iris_tracking_depth_from_iris.gif](../images/mobile/iris_tracking_depth_from_iris.gif) | +:--------------------------------------------------------------------------------------------: | +*Fig 3. (Left) MediaPipe Iris predicting metric distance in cm on a Pixel 2 from iris tracking without use of a depth sensor. (Right) Ground-truth depth.* | + +## Example Apps + +Please first see general instructions for +[Android](../getting_started/building_examples.md#android), +[iOS](../getting_started/building_examples.md#ios) and +[desktop](../getting_started/building_examples.md#desktop) on how to build +MediaPipe examples. + +Note: To visualize a graph, copy the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how +to visualize its associated subgraphs, please see +[visualizer documentation](../tools/visualizer.md). + +### Mobile + +* Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) +* Android target: + [(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1cywcNtqk764TlZf1lvSTV4F3NGB2aL1R/view?usp=sharing) + [`mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu:iristrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD) +* iOS target: + [`mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp`](http:/mediapipe/examples/ios/iristrackinggpu/BUILD) + +### Desktop + +#### Live Camera Input + +Please first see general instructions for +[desktop](../getting_started/building_examples.md#desktop) on how to build +MediaPipe examples. + +* Running on CPU + * Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_cpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/iris_tracking:iris_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/iris_tracking/BUILD) +* Running on GPU + * Graph: + [`mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/iris_tracking/iris_tracking_gpu.pbtxt) + * Target: + [`mediapipe/examples/desktop/iris_tracking:iris_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/iris_tracking/BUILD) + +#### Video File Input + +1. To build the application, run: + + ```bash + bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/iris_tracking:iris_tracking_cpu_video_input + ``` + +2. To run the application, replace `` and `` in the command below with your own paths: + + ``` + bazel-bin/mediapipe/examples/desktop/iris_tracking/iris_tracking_cpu_video_input \ + --calculator_graph_config_file=mediapipe/graphs/iris_tracking/iris_tracking_cpu_video_input.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= + ``` + +#### Single-image Depth Estimation + +1. To build the application, run: + + ```bash + bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/iris_tracking:iris_depth_from_image_desktop + ``` + +2. To run the application, replace `` and `` in the command below with your own paths: + + ```bash + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop \ + --input_image_path= --output_image_path= + ``` + +### Web + +Please refer to [these instructions](../index.md#mediapipe-on-the-web). + +## Resources + +* Google AI Blog: [MediaPipe Iris: Real-time Eye Tracking and Depth Estimation + from a Single Image](https://mediapipe.page.link/iris-blog) +* Paper: + [Real-time Pupil Tracking from Monocular Video for Digital Puppetry](https://arxiv.org/abs/2006.11341) + ([presentation](https://youtu.be/cIhXkiiapQI)) +* Face detection model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite) +* Face landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), + [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) +* Iris landmark model: + [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark.tflite) +* [Model card](https://mediapipe.page.link/iris-mc) diff --git a/docs/solutions/knift.md b/docs/solutions/knift.md index 15d6f5d30..942ad255f 100644 --- a/docs/solutions/knift.md +++ b/docs/solutions/knift.md @@ -2,7 +2,7 @@ layout: default title: KNIFT (Template-based Feature Matching) parent: Solutions -nav_order: 8 +nav_order: 9 --- # MediaPipe KNIFT diff --git a/docs/solutions/media_sequence.md b/docs/solutions/media_sequence.md index bee6d8951..dc3ef63bc 100644 --- a/docs/solutions/media_sequence.md +++ b/docs/solutions/media_sequence.md @@ -2,7 +2,7 @@ layout: default title: Dataset Preparation with MediaSequence parent: Solutions -nav_order: 10 +nav_order: 11 --- # Dataset Preparation with MediaSequence diff --git a/docs/solutions/object_detection.md b/docs/solutions/object_detection.md index fb0bff2b1..340e1990a 100644 --- a/docs/solutions/object_detection.md +++ b/docs/solutions/object_detection.md @@ -2,7 +2,7 @@ layout: default title: Object Detection parent: Solutions -nav_order: 5 +nav_order: 6 --- # MediaPipe Object Detection @@ -95,8 +95,8 @@ Please first see general instructions for ``` GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ - --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ - --input_side_packets=input_video_path=,output_video_path= + --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= ``` * With a TensorFlow Model @@ -131,8 +131,8 @@ Please first see general instructions for ```bash GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ - --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ - --input_side_packets=input_video_path=,output_video_path= + --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ + --input_side_packets=input_video_path=,output_video_path= ``` ### Coral diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index f4db179e2..0239f174c 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -2,7 +2,7 @@ layout: default title: Objectron (3D Object Detection) parent: Solutions -nav_order: 7 +nav_order: 8 --- # MediaPipe Objectron diff --git a/docs/solutions/solutions.md b/docs/solutions/solutions.md index 73331526a..840b5ce3d 100644 --- a/docs/solutions/solutions.md +++ b/docs/solutions/solutions.md @@ -14,12 +14,13 @@ has_toc: false --- - + []() | Android | iOS | Desktop | Web | Coral :---------------------------------------------------------------------------- | :-----: | :-: | :-----: | :-: | :---: [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | +[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | ✅ | [Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | [Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | ✅ | [Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | ✅ diff --git a/docs/solutions/youtube_8m.md b/docs/solutions/youtube_8m.md index 5179e3aa5..ebb51dcc4 100644 --- a/docs/solutions/youtube_8m.md +++ b/docs/solutions/youtube_8m.md @@ -2,7 +2,7 @@ layout: default title: YouTube-8M Feature Extraction and Model Inference parent: Solutions -nav_order: 11 +nav_order: 12 --- # YouTube-8M Feature Extraction and Model Inference diff --git a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen index 4830f5b16..b8e8f95bf 100644 --- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen +++ b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen @@ -10,6 +10,7 @@ "mediapipe/examples/ios/facemeshgpu/BUILD", "mediapipe/examples/ios/handdetectiongpu/BUILD", "mediapipe/examples/ios/handtrackinggpu/BUILD", + "mediapipe/examples/ios/iristrackinggpu/BUILD", "mediapipe/examples/ios/multihandtrackinggpu/BUILD", "mediapipe/examples/ios/objectdetectioncpu/BUILD", "mediapipe/examples/ios/objectdetectiongpu/BUILD" @@ -21,6 +22,7 @@ "//mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp", "//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp", "//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp", + "//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp", "//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp", "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp", "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp", @@ -88,6 +90,8 @@ "mediapipe/examples/ios/handdetectiongpu/Base.lproj", "mediapipe/examples/ios/handtrackinggpu", "mediapipe/examples/ios/handtrackinggpu/Base.lproj", + "mediapipe/examples/ios/iristrackinggpu", + "mediapipe/examples/ios/iristrackinggpu/Base.lproj", "mediapipe/examples/ios/multihandtrackinggpu", "mediapipe/examples/ios/multihandtrackinggpu/Base.lproj", "mediapipe/examples/ios/objectdetectioncpu", @@ -110,6 +114,7 @@ "mediapipe/graphs/hand_tracking", "mediapipe/graphs/object_detection", "mediapipe/models", + "mediapipe/modules", "mediapipe/objc", "mediapipe/util", "mediapipe/util/android", diff --git a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf index c2c54aeeb..9f1ab5d66 100644 --- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf +++ b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf @@ -17,6 +17,7 @@ "mediapipe/examples/ios/facemeshgpu", "mediapipe/examples/ios/handdetectiongpu", "mediapipe/examples/ios/handtrackinggpu", + "mediapipe/examples/ios/iristrackinggpu", "mediapipe/examples/ios/multihandtrackinggpu", "mediapipe/examples/ios/objectdetectioncpu", "mediapipe/examples/ios/objectdetectiongpu" diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 1d053081c..a0b22054f 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -316,6 +316,37 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "concatenate_normalized_landmark_list_calculator", + srcs = ["concatenate_normalized_landmark_list_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":concatenate_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "concatenate_normalized_landmark_list_calculator_test", + srcs = ["concatenate_normalized_landmark_list_calculator_test.cc"], + deps = [ + ":concatenate_normalized_landmark_list_calculator", + ":concatenate_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], +) + cc_test( name = "concatenate_vector_calculator_test", srcs = ["concatenate_vector_calculator_test.cc"], diff --git a/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc new file mode 100644 index 000000000..54c3e05b9 --- /dev/null +++ b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator.cc @@ -0,0 +1,84 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT +#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT + +#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Concatenates several NormalizedLandmarkList protos following stream index +// order. This class assumes that every input stream contains a +// NormalizedLandmarkList proto object. +class ConcatenateNormalizedLandmarkListCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().NumEntries() != 0); + RET_CHECK(cc->Outputs().NumEntries() == 1); + + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + cc->Inputs().Index(i).Set(); + } + + cc->Outputs().Index(0).Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + only_emit_if_all_present_ = + cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>() + .only_emit_if_all_present(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (only_emit_if_all_present_) { + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + if (cc->Inputs().Index(i).IsEmpty()) return ::mediapipe::OkStatus(); + } + } + + NormalizedLandmarkList output; + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + if (cc->Inputs().Index(i).IsEmpty()) continue; + const NormalizedLandmarkList& input = + cc->Inputs().Index(i).Get(); + for (int j = 0; j < input.landmark_size(); ++j) { + const NormalizedLandmark& input_landmark = input.landmark(j); + *output.add_landmark() = input_landmark; + } + } + cc->Outputs().Index(0).AddPacket( + MakePacket(output).At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } + + private: + bool only_emit_if_all_present_; +}; + +REGISTER_CALCULATOR(ConcatenateNormalizedLandmarkListCalculator); + +} // namespace mediapipe + +// NOLINTNEXTLINE +#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc new file mode 100644 index 000000000..fd116ece7 --- /dev/null +++ b/mediapipe/calculators/core/concatenate_normalized_landmark_list_calculator_test.cc @@ -0,0 +1,184 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +constexpr float kLocationValue = 3; + +NormalizedLandmarkList GenerateLandmarks(int landmarks_size, + int value_multiplier) { + NormalizedLandmarkList landmarks; + for (int i = 0; i < landmarks_size; ++i) { + NormalizedLandmark* landmark = landmarks.add_landmark(); + landmark->set_x(value_multiplier * kLocationValue); + landmark->set_y(value_multiplier * kLocationValue); + landmark->set_z(value_multiplier * kLocationValue); + } + return landmarks; +} + +void ValidateCombinedLandmarks( + const std::vector& inputs, + const NormalizedLandmarkList& result) { + int element_id = 0; + int expected_size = 0; + for (int i = 0; i < inputs.size(); ++i) { + const NormalizedLandmarkList& landmarks_i = inputs[i]; + expected_size += landmarks_i.landmark_size(); + for (int j = 0; j < landmarks_i.landmark_size(); ++j) { + const NormalizedLandmark& expected = landmarks_i.landmark(j); + const NormalizedLandmark& got = result.landmark(element_id); + EXPECT_FLOAT_EQ(expected.x(), got.x()); + EXPECT_FLOAT_EQ(expected.y(), got.y()); + EXPECT_FLOAT_EQ(expected.z(), got.z()); + ++element_id; + } + } + EXPECT_EQ(expected_size, result.landmark_size()); +} + +void AddInputLandmarkLists( + const std::vector& input_landmarks_vec, + int64 timestamp, CalculatorRunner* runner) { + for (int i = 0; i < input_landmarks_vec.size(); ++i) { + runner->MutableInputs()->Index(i).packets.push_back( + MakePacket(input_landmarks_vec[i]) + .At(Timestamp(timestamp))); + } +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, EmptyVectorInputs) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList empty_list; + std::vector inputs = {empty_list, empty_list, + empty_list}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(0, outputs[0].Get().landmark_size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, OneTimestamp) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + NormalizedLandmarkList input_1 = + GenerateLandmarks(/*landmarks_size=*/1, /*value_multiplier=*/1); + NormalizedLandmarkList input_2 = + GenerateLandmarks(/*landmarks_size=*/2, /*value_multiplier=*/2); + std::vector inputs = {input_0, input_1, input_2}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, + TwoInputsAtTwoTimestamps) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/3, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + NormalizedLandmarkList input_1 = + GenerateLandmarks(/*landmarks_size=*/1, /*value_multiplier=*/1); + NormalizedLandmarkList input_2 = + GenerateLandmarks(/*landmarks_size=*/2, /*value_multiplier=*/2); + std::vector inputs = {input_0, input_1, input_2}; + { AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); } + { AddInputLandmarkLists(inputs, /*timestamp=*/2, &runner); } + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(2, outputs.size()); + { + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); + } + { + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + const NormalizedLandmarkList& result = + outputs[1].Get(); + ValidateCombinedLandmarks(inputs, result); + } +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, + OneEmptyStreamStillOutput) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/"", /*num_inputs=*/2, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + std::vector inputs = {input_0}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const NormalizedLandmarkList& result = + outputs[0].Get(); + ValidateCombinedLandmarks(inputs, result); +} + +TEST(ConcatenateNormalizedLandmarkListCalculatorTest, OneEmptyStreamNoOutput) { + CalculatorRunner runner("ConcatenateNormalizedLandmarkListCalculator", + /*options_string=*/ + "[mediapipe.ConcatenateVectorCalculatorOptions.ext]: " + "{only_emit_if_all_present: true}", + /*num_inputs=*/2, + /*num_outputs=*/1, /*num_side_packets=*/0); + + NormalizedLandmarkList input_0 = + GenerateLandmarks(/*landmarks_size=*/3, /*value_multiplier=*/0); + std::vector inputs = {input_0}; + AddInputLandmarkLists(inputs, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(0, outputs.size()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 7efb4a011..3cefe9439 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -630,3 +630,34 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "image_file_properties_calculator", + srcs = ["image_file_properties_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_file_properties_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@easyexif", + ], + alwayslink = 1, +) + +cc_test( + name = "image_file_properties_calculator_test", + srcs = ["image_file_properties_calculator_test.cc"], + data = ["//mediapipe/calculators/image/testdata:test_images"], + deps = [ + ":image_file_properties_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image_file_properties_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + ], +) diff --git a/mediapipe/calculators/image/image_file_properties_calculator.cc b/mediapipe/calculators/image/image_file_properties_calculator.cc new file mode 100644 index 000000000..82af9ef8a --- /dev/null +++ b/mediapipe/calculators/image/image_file_properties_calculator.cc @@ -0,0 +1,195 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "exif.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_file_properties.pb.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace { + +// 35 MM sensor has dimensions 36 mm x 24 mm, so diagonal length is +// sqrt(36^2 + 24^2). +static const double SENSOR_DIAGONAL_35MM = std::sqrt(1872.0); + +::mediapipe::StatusOr ComputeFocalLengthInPixels( + int image_width, int image_height, double focal_length_35mm, + double focal_length_mm) { + // TODO: Allow returning image file properties even when focal length + // computation is not possible. + if (image_width == 0 || image_height == 0) { + return ::mediapipe::InternalError( + "Image dimensions should be non-zero to compute focal length in " + "pixels."); + } + if (focal_length_mm == 0) { + return ::mediapipe::InternalError( + "Focal length in mm should be non-zero to compute focal length in " + "pixels."); + } + if (focal_length_35mm == 0) { + return ::mediapipe::InternalError( + "Focal length in 35 mm should be non-zero to compute focal length in " + "pixels."); + } + // Derived from + // https://en.wikipedia.org/wiki/35_mm_equivalent_focal_length#Calculation. + /// Using focal_length_35mm = focal_length_mm * SENSOR_DIAGONAL_35MM / + /// sensor_diagonal_mm, we can calculate the diagonal length of the sensor in + /// millimeters i.e. sensor_diagonal_mm. + double sensor_diagonal_mm = + SENSOR_DIAGONAL_35MM / focal_length_35mm * focal_length_mm; + // Note that for the following computations, the longer dimension is treated + // as image width and the shorter dimension is treated as image height. + int width = image_width; + int height = image_height; + if (image_height > image_width) { + width = image_height; + height = image_width; + } + double inv_aspect_ratio = (double)height / width; + // Compute sensor width. + /// Using Pythagoras theorem, sensor_width^2 + sensor_height^2 = + /// sensor_diagonal_mm^2. We can substitute sensor_width / sensor_height with + /// the aspect ratio calculated in pixels to compute the sensor width. + double sensor_width = std::sqrt((sensor_diagonal_mm * sensor_diagonal_mm) / + (1.0 + inv_aspect_ratio * inv_aspect_ratio)); + + // Compute focal length in pixels. + double focal_length_pixels = width * focal_length_mm / sensor_width; + return focal_length_pixels; +} + +::mediapipe::StatusOr GetImageFileProperites( + const std::string& image_bytes) { + easyexif::EXIFInfo result; + int code = result.parseFrom(image_bytes); + if (code) { + return ::mediapipe::InternalError("Error parsing EXIF, code: " + + std::to_string(code)); + } + + ImageFileProperties properties; + properties.set_image_width(result.ImageWidth); + properties.set_image_height(result.ImageHeight); + properties.set_focal_length_mm(result.FocalLength); + properties.set_focal_length_35mm(result.FocalLengthIn35mm); + + ASSIGN_OR_RETURN(auto focal_length_pixels, + ComputeFocalLengthInPixels(properties.image_width(), + properties.image_height(), + properties.focal_length_35mm(), + properties.focal_length_mm())); + properties.set_focal_length_pixels(focal_length_pixels); + + return properties; +} + +} // namespace + +// Calculator to extract EXIF information from an image file. The input is +// a std::string containing raw byte data from a file, and the output is an +// ImageFileProperties proto object with the relevant fields filled in. +// The calculator accepts the input as a stream or a side packet, and can output +// the result as a stream or a side packet. The calculator checks that if an +// output stream is present, it outputs to that stream, and if not, it checks if +// it can output to a side packet. +// +// Example config with input and output streams: +// node { +// calculator: "ImageFilePropertiesCalculator" +// input_stream: "image_bytes" +// output_stream: "image_properties" +// } +// Example config with input and output side packets: +// node { +// calculator: "ImageFilePropertiesCalculator" +// input_side_packet: "image_bytes" +// output_side_packet: "image_properties" +// } +class ImageFilePropertiesCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + if (cc->Inputs().NumEntries() != 0) { + RET_CHECK(cc->Inputs().NumEntries() == 1); + cc->Inputs().Index(0).Set(); + } else { + RET_CHECK(cc->InputSidePackets().NumEntries() == 1); + cc->InputSidePackets().Index(0).Set(); + } + if (cc->Outputs().NumEntries() != 0) { + RET_CHECK(cc->Outputs().NumEntries() == 1); + cc->Outputs().Index(0).Set<::mediapipe::ImageFileProperties>(); + } else { + RET_CHECK(cc->OutputSidePackets().NumEntries() == 1); + cc->OutputSidePackets().Index(0).Set<::mediapipe::ImageFileProperties>(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + + if (cc->InputSidePackets().NumEntries() == 1) { + const std::string& image_bytes = + cc->InputSidePackets().Index(0).Get(); + ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + read_properties_ = true; + } + + if (read_properties_ && cc->OutputSidePackets().NumEntries() == 1) { + cc->OutputSidePackets().Index(0).Set( + MakePacket(properties_)); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (cc->Inputs().NumEntries() == 1) { + if (cc->Inputs().Index(0).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + const std::string& image_bytes = cc->Inputs().Index(0).Get(); + ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes)); + read_properties_ = true; + } + if (read_properties_) { + if (cc->Outputs().NumEntries() == 1) { + cc->Outputs().Index(0).AddPacket( + MakePacket(properties_) + .At(cc->InputTimestamp())); + } else { + cc->OutputSidePackets().Index(0).Set( + MakePacket(properties_) + .At(::mediapipe::Timestamp::Unset())); + } + } + + return ::mediapipe::OkStatus(); + } + + private: + ImageFileProperties properties_; + bool read_properties_ = false; +}; +REGISTER_CALCULATOR(ImageFilePropertiesCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/image_file_properties_calculator_test.cc b/mediapipe/calculators/image/image_file_properties_calculator_test.cc new file mode 100644 index 000000000..954f095d6 --- /dev/null +++ b/mediapipe/calculators/image/image_file_properties_calculator_test.cc @@ -0,0 +1,134 @@ +// Copyright 2018 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image_file_properties.pb.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { + +namespace { + +constexpr char kImageFilePath[] = + "/mediapipe/calculators/image/testdata/" + "front_camera_pixel2.jpg"; +constexpr int kExpectedWidth = 2448; +constexpr int kExpectedHeight = 3264; +constexpr double kExpectedFocalLengthMm = 3.38; +constexpr double kExpectedFocalLengthIn35Mm = 25; +constexpr double kExpectedFocalLengthPixels = 2357.48; + +double RoundToNDecimals(double value, int n) { + return std::round(value * pow(10.0, n)) / pow(10.0, n); +} + +TEST(ImageFilePropertiesCalculatorTest, ReadsFocalLengthFromJpegInStreams) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_stream: "image_bytes" + output_stream: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableInputs()->Index(0).packets.push_back( + MakePacket(image_contents).At(Timestamp(0))); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.Outputs(); + ASSERT_EQ(1, outputs.NumEntries()); + const std::vector& packets = outputs.Index(0).packets; + ASSERT_EQ(1, packets.size()); + const auto& result = packets[0].Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +TEST(ImageFilePropertiesCalculatorTest, ReadsFocalLengthFromJpegInSidePackets) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_side_packet: "image_bytes" + output_side_packet: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableSidePackets()->Index(0) = + MakePacket(image_contents).At(Timestamp(0)); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.OutputSidePackets(); + EXPECT_EQ(1, outputs.NumEntries()); + const auto& packet = outputs.Index(0); + const auto& result = packet.Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +TEST(ImageFilePropertiesCalculatorTest, + ReadsFocalLengthFromJpegStreamToSidePacket) { + std::string image_filepath = file::JoinPath("./", kImageFilePath); + std::string image_contents; + MP_ASSERT_OK(file::GetContents(image_filepath, &image_contents)); + + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "ImageFilePropertiesCalculator" + input_stream: "image_bytes" + output_side_packet: "properties" + )"); + + CalculatorRunner runner(node_config); + runner.MutableInputs()->Index(0).packets.push_back( + MakePacket(image_contents).At(Timestamp(0))); + MP_ASSERT_OK(runner.Run()); + const auto& outputs = runner.OutputSidePackets(); + EXPECT_EQ(1, outputs.NumEntries()); + const auto& packet = outputs.Index(0); + const auto& result = packet.Get<::mediapipe::ImageFileProperties>(); + EXPECT_EQ(kExpectedWidth, result.image_width()); + EXPECT_EQ(kExpectedHeight, result.image_height()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthMm, result.focal_length_mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthIn35Mm, result.focal_length_35mm()); + EXPECT_DOUBLE_EQ(kExpectedFocalLengthPixels, + RoundToNDecimals(result.focal_length_pixels(), /*n=*/2)); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index 8e6fe977e..e66bc1095 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -160,8 +160,8 @@ class AnnotationOverlayCalculator : public CalculatorBase { GLuint image_mat_tex_ = 0; // Overlay drawing image for GPU. int width_ = 0; int height_ = 0; - int width_gpu_ = 0; // Size of overlay drawing texture. - int height_gpu_ = 0; + int width_canvas_ = 0; // Size of overlay drawing texture canvas. + int height_canvas_ = 0; #endif // MEDIAPIPE_DISABLE_GPU }; REGISTER_CALCULATOR(AnnotationOverlayCalculator); @@ -250,6 +250,7 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); // Initialize the helper renderer library. renderer_ = absl::make_unique(); renderer_->SetFlipTextVertically(options_.flip_text_vertically()); + if (use_gpu_) renderer_->SetScaleFactor(options_.gpu_scale_factor()); // Set the output header based on the input header (if present). const char* input_tag = use_gpu_ ? kInputFrameTagGpu : kInputFrameTag; @@ -391,8 +392,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, image_mat_tex_); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width_gpu_, height_gpu_, GL_RGB, - GL_UNSIGNED_BYTE, overlay_image); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width_canvas_, height_canvas_, + GL_RGB, GL_UNSIGNED_BYTE, overlay_image); glBindTexture(GL_TEXTURE_2D, 0); } @@ -494,12 +495,13 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); if (format != mediapipe::ImageFormat::SRGBA && format != mediapipe::ImageFormat::SRGB) RET_CHECK_FAIL() << "Unsupported GPU input format: " << format; - image_mat = absl::make_unique(height_gpu_, width_gpu_, CV_8UC3); + image_mat = + absl::make_unique(height_canvas_, width_canvas_, CV_8UC3); memset(image_mat->data, kAnnotationBackgroundColor, - height_gpu_ * width_gpu_ * image_mat->elemSize()); + height_canvas_ * width_canvas_ * image_mat->elemSize()); } else { image_mat = absl::make_unique( - height_gpu_, width_gpu_, CV_8UC3, + height_canvas_, width_canvas_, CV_8UC3, cv::Scalar(options_.canvas_color().r(), options_.canvas_color().g(), options_.canvas_color().b())); } @@ -646,8 +648,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); width_ = RoundUp(options_.canvas_width_px(), alignment); height_ = RoundUp(options_.canvas_height_px(), alignment); } - width_gpu_ = RoundUp(width_ * scale_factor, alignment); - height_gpu_ = RoundUp(height_ * scale_factor, alignment); + width_canvas_ = RoundUp(width_ * scale_factor, alignment); + height_canvas_ = RoundUp(height_ * scale_factor, alignment); // Init texture for opencv rendered frame. { @@ -655,8 +657,8 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); glBindTexture(GL_TEXTURE_2D, image_mat_tex_); // TODO // OpenCV only renders to RGB images, not RGBA. Ideally this should be RGBA. - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, width_gpu_, height_gpu_, 0, GL_RGB, - GL_UNSIGNED_BYTE, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, width_canvas_, height_canvas_, 0, + GL_RGB, GL_UNSIGNED_BYTE, nullptr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.proto b/mediapipe/calculators/util/annotation_overlay_calculator.proto index b34f2c1ae..339bb2183 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.proto +++ b/mediapipe/calculators/util/annotation_overlay_calculator.proto @@ -50,7 +50,5 @@ message AnnotationOverlayCalculatorOptions { // This can be used to speed up annotation by drawing the annotation on an // intermediate image with a reduced scale, e.g. 0.5 (of the input image width // and height), before resizing and overlaying it on top of the input image. - // Should only be used if *all* render data uses normalized coordinates - // (or absolute coordinates are updated to scale accordingly). optional float gpu_scale_factor = 7 [default = 1.0]; } diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index da76a1536..57a500cc5 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -316,6 +316,7 @@ cc_library( "//mediapipe/util/tracking", "//mediapipe/util/tracking:box_tracker", "//mediapipe/util/tracking:tracking_visualization_utilities", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings", ], diff --git a/mediapipe/calculators/video/box_tracker_calculator.cc b/mediapipe/calculators/video/box_tracker_calculator.cc index 4fef7cc8e..a56392ee3 100644 --- a/mediapipe/calculators/video/box_tracker_calculator.cc +++ b/mediapipe/calculators/video/box_tracker_calculator.cc @@ -18,6 +18,7 @@ #include #include +#include "absl/container/flat_hash_set.h" #include "absl/container/node_hash_set.h" #include "absl/strings/numbers.h" #include "mediapipe/calculators/video/box_tracker_calculator.pb.h" @@ -238,6 +239,11 @@ class BoxTrackerCalculator : public CalculatorBase { // Queued track time requests. std::vector queued_track_requests_; + // Stores the tracked ids that have been discarded actively, from continuous + // tracking data. It may accumulate across multiple frames. Once consumed, it + // should be cleared immediately. + absl::flat_hash_set actively_discarded_tracked_ids_; + // Add smooth transition between re-acquisition and previous tracked boxes. // `result_box` is the tracking result of one specific timestamp. The smoothed // result will be updated in place. @@ -1144,9 +1150,16 @@ void BoxTrackerCalculator::StreamTrack(const TrackingData& data, CHECK(box_map); CHECK(failed_ids); + // Cache the actively discarded tracked ids from the new tracking data. + for (const int discarded_id : + data.motion_data().actively_discarded_tracked_ids()) { + actively_discarded_tracked_ids_.insert(discarded_id); + } + // Track all existing boxes by one frame. MotionVectorFrame mvf; // Holds motion from current to previous frame. MotionVectorFrameFromTrackingData(data, &mvf); + mvf.actively_discarded_tracked_ids = &actively_discarded_tracked_ids_; if (forward) { MotionVectorFrame mvf_inverted; diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD new file mode 100644 index 000000000..202cee82d --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/BUILD @@ -0,0 +1,62 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu_deps", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +android_binary( + name = "iristrackinggpu", + srcs = glob(["*.java"]), + assets = [ + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb", + "//mediapipe/modules/face_landmark:face_landmark.tflite", + "//mediapipe/modules/iris_landmark:iris_landmark.tflite", + "//mediapipe/modules/face_detection:face_detection_front.tflite", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.iristrackinggpu", + "appName": "Iris Tracking", + "mainActivity": ".MainActivity", + "cameraFacingFront": "True", + "binaryGraphName": "iris_tracking_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + }, + multidex = "native", + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java new file mode 100644 index 000000000..a979e698f --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/iristrackinggpu/MainActivity.java @@ -0,0 +1,40 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.iristrackinggpu; + +import android.graphics.SurfaceTexture; +import com.google.mediapipe.framework.Packet; +import java.util.HashMap; +import java.util.Map; + +/** Main activity of MediaPipe iris tracking app. */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "MainActivity"; + + private static final String FOCAL_LENGTH_STREAM_NAME = "focal_length_pixel"; + + @Override + protected void onCameraStarted(SurfaceTexture surfaceTexture) { + super.onCameraStarted(surfaceTexture); + + float focalLength = cameraHelper.getFocalLengthPixels(); + if (focalLength != Float.MIN_VALUE) { + Packet focalLengthSidePacket = processor.getPacketCreator().createFloat32(focalLength); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(FOCAL_LENGTH_STREAM_NAME, focalLengthSidePacket); + processor.setInputSidePackets(inputSidePackets); + } + } +} diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc index bb922d92a..cba751057 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc @@ -165,28 +165,53 @@ REGISTER_CALCULATOR(ContentZoomingCalculator); } namespace { -::mediapipe::Status UpdateRanges(const SalientRegion& region, float* xmin, +mediapipe::LocationData::RelativeBoundingBox ShiftDetection( + const mediapipe::LocationData::RelativeBoundingBox& relative_bounding_box, + const float y_offset_percent, const float x_offset_percent) { + auto shifted_bb = relative_bounding_box; + shifted_bb.set_ymin(relative_bounding_box.ymin() + + relative_bounding_box.height() * y_offset_percent); + shifted_bb.set_xmin(relative_bounding_box.xmin() + + relative_bounding_box.width() * x_offset_percent); + return shifted_bb; +} +mediapipe::autoflip::RectF ShiftDetection( + const mediapipe::autoflip::RectF& relative_bounding_box, + const float y_offset_percent, const float x_offset_percent) { + auto shifted_bb = relative_bounding_box; + shifted_bb.set_y(relative_bounding_box.y() + + relative_bounding_box.height() * y_offset_percent); + shifted_bb.set_x(relative_bounding_box.x() + + relative_bounding_box.width() * x_offset_percent); + return shifted_bb; +} +::mediapipe::Status UpdateRanges(const SalientRegion& region, + const float shift_vertical, + const float shift_horizontal, float* xmin, float* xmax, float* ymin, float* ymax) { if (!region.has_location_normalized()) { return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) << "SalientRegion did not have location normalized set."; } - *xmin = fmin(*xmin, region.location_normalized().x()); - *xmax = fmax(*xmax, region.location_normalized().x() + - region.location_normalized().width()); - *ymin = fmin(*ymin, region.location_normalized().y()); - *ymax = fmax(*ymax, region.location_normalized().y() + - region.location_normalized().height()); + auto location = ShiftDetection(region.location_normalized(), shift_vertical, + shift_horizontal); + *xmin = fmin(*xmin, location.x()); + *xmax = fmax(*xmax, location.x() + location.width()); + *ymin = fmin(*ymin, location.y()); + *ymax = fmax(*ymax, location.y() + location.height()); return ::mediapipe::OkStatus(); } ::mediapipe::Status UpdateRanges(const mediapipe::Detection& detection, - float* xmin, float* xmax, float* ymin, - float* ymax) { + const float shift_vertical, + const float shift_horizontal, float* xmin, + float* xmax, float* ymin, float* ymax) { RET_CHECK(detection.location_data().format() == mediapipe::LocationData::RELATIVE_BOUNDING_BOX) << "Face detection input is lacking required relative_bounding_box()"; - const auto& location = detection.location_data().relative_bounding_box(); + const auto& location = + ShiftDetection(detection.location_data().relative_bounding_box(), + shift_vertical, shift_horizontal); *xmin = fmin(*xmin, location.xmin()); *xmax = fmax(*xmax, location.xmin() + location.width()); *ymin = fmin(*ymin, location.ymin()); @@ -270,7 +295,9 @@ void MakeStaticFeatures(const int top_border, const int bottom_border, continue; } only_required_found = true; - MP_RETURN_IF_ERROR(UpdateRanges(region, &xmin, &xmax, &ymin, &ymax)); + MP_RETURN_IF_ERROR(UpdateRanges( + region, options_.detection_shift_vertical(), + options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax)); } } @@ -279,7 +306,9 @@ void MakeStaticFeatures(const int top_border, const int bottom_border, cc->Inputs().Tag(kDetections).Get>(); for (const auto& detection : raw_detections) { only_required_found = true; - MP_RETURN_IF_ERROR(UpdateRanges(detection, &xmin, &xmax, &ymin, &ymax)); + MP_RETURN_IF_ERROR(UpdateRanges( + detection, options_.detection_shift_vertical(), + options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax)); } } diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto index bf0b8201b..2634a4afe 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.proto @@ -19,6 +19,7 @@ package mediapipe.autoflip; import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto"; import "mediapipe/framework/calculator.proto"; +// NextTag: 13 message ContentZoomingCalculatorOptions { extend mediapipe.CalculatorOptions { optional ContentZoomingCalculatorOptions ext = 313091992; @@ -44,6 +45,12 @@ message ContentZoomingCalculatorOptions { optional int64 height = 2; } optional Size target_size = 8; + // Amount to shift an input detection as a ratio of the size (positive: + // down/right, negative: up/left). Use a negative value to increase padding + // above/left of an object, positive to increase padding below/right of an + // object. + optional float detection_shift_vertical = 11 [default = 0.0]; + optional float detection_shift_horizontal = 12 [default = 0.0]; // Deprecated parameters optional KinematicOptions kinematic_options = 2 [deprecated = true]; diff --git a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc index ed3a10c9e..e20ebba12 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc @@ -366,6 +366,45 @@ TEST(ContentZoomingCalculatorTest, ZoomTestNearInsideBorder) { CheckCropRect(42, 42, 83, 83, 1, runner->Outputs().Tag("CROP_RECT").packets); } +TEST(ContentZoomingCalculatorTest, VerticalShift) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_vertical(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.1, .1, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + // 1000px * .1 offset + 1000*.1*.1 shift = 170 + CheckCropRect(150, 170, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, HorizontalShift) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_horizontal(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.1, .1, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + // 1000px * .1 offset + 1000*.1*.1 shift = 170 + CheckCropRect(170, 150, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + +TEST(ContentZoomingCalculatorTest, ShiftOutsideBounds) { + auto config = ParseTextProtoOrDie(kConfigD); + auto* options = config.mutable_options()->MutableExtension( + ContentZoomingCalculatorOptions::ext); + options->set_detection_shift_vertical(-0.2); + options->set_detection_shift_horizontal(0.2); + auto runner = ::absl::make_unique(config); + AddDetection(cv::Rect_(.9, 0, .1, .1), 0, runner.get()); + MP_ASSERT_OK(runner->Run()); + CheckCropRect(944, 56, 111, 111, 0, + runner->Outputs().Tag("CROP_RECT").packets); +} + } // namespace } // namespace autoflip diff --git a/mediapipe/examples/desktop/iris_tracking/BUILD b/mediapipe/examples/desktop/iris_tracking/BUILD new file mode 100644 index 000000000..430922115 --- /dev/null +++ b/mediapipe/examples/desktop/iris_tracking/BUILD @@ -0,0 +1,60 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "iris_depth_from_image_desktop", + srcs = ["iris_depth_from_image_desktop.cc"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:commandlineflags", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "//mediapipe/graphs/iris_tracking:iris_depth_cpu_deps", + ], +) + +cc_binary( + name = "iris_tracking_cpu_video_input", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/iris_tracking:iris_tracking_cpu_video_input_deps", + ], +) + +cc_binary( + name = "iris_tracking_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps", + ], +) + +# Linux only +cc_binary( + name = "iris_tracking_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_gpu", + "//mediapipe/graphs/iris_tracking:iris_tracking_gpu_deps", + ], +) diff --git a/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc b/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc new file mode 100644 index 000000000..4cfab621d --- /dev/null +++ b/mediapipe/examples/desktop/iris_tracking/iris_depth_from_image_desktop.cc @@ -0,0 +1,162 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// A utility to extract iris depth from a single image of face using the graph +// mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt. +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/commandlineflags.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +constexpr char kInputStream[] = "input_image_bytes"; +constexpr char kOutputImageStream[] = "output_image"; +constexpr char kLeftIrisDepthMmStream[] = "left_iris_depth_mm"; +constexpr char kRightIrisDepthMmStream[] = "right_iris_depth_mm"; +constexpr char kWindowName[] = "MediaPipe"; +constexpr char kCalculatorGraphConfigFile[] = + "mediapipe/graphs/iris_tracking/iris_depth_cpu.pbtxt"; +constexpr float kMicrosPerSecond = 1e6; + +DEFINE_string(input_image_path, "", + "Full path of image to load. " + "If not provided, nothing will run."); +DEFINE_string(output_image_path, "", + "Full path of where to save image result (.jpg only). " + "If not provided, show result in a window."); + +namespace { + +::mediapipe::StatusOr ReadFileToString( + const std::string& file_path) { + std::string contents; + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents(file_path, &contents)); + return contents; +} + +::mediapipe::Status ProcessImage( + std::unique_ptr<::mediapipe::CalculatorGraph> graph) { + LOG(INFO) << "Load the image."; + ASSIGN_OR_RETURN(const std::string raw_image, + ReadFileToString(FLAGS_input_image_path)); + + LOG(INFO) << "Start running the calculator graph."; + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller output_image_poller, + graph->AddOutputStreamPoller(kOutputImageStream)); + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller left_iris_depth_poller, + graph->AddOutputStreamPoller(kLeftIrisDepthMmStream)); + ASSIGN_OR_RETURN(::mediapipe::OutputStreamPoller right_iris_depth_poller, + graph->AddOutputStreamPoller(kRightIrisDepthMmStream)); + MP_RETURN_IF_ERROR(graph->StartRun({})); + + // Send image packet into the graph. + const size_t fake_timestamp_us = (double)cv::getTickCount() / + (double)cv::getTickFrequency() * + kMicrosPerSecond; + MP_RETURN_IF_ERROR(graph->AddPacketToInputStream( + kInputStream, ::mediapipe::MakePacket(raw_image).At( + ::mediapipe::Timestamp(fake_timestamp_us)))); + + // Get the graph result packets, or stop if that fails. + ::mediapipe::Packet left_iris_depth_packet; + if (!left_iris_depth_poller.Next(&left_iris_depth_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'left_iris_depth_mm'."); + } + const auto& left_iris_depth_mm = left_iris_depth_packet.Get(); + const int left_iris_depth_cm = std::round(left_iris_depth_mm / 10); + std::cout << "Left Iris Depth: " << left_iris_depth_cm << " cm." << std::endl; + + ::mediapipe::Packet right_iris_depth_packet; + if (!right_iris_depth_poller.Next(&right_iris_depth_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'right_iris_depth_mm'."); + } + const auto& right_iris_depth_mm = right_iris_depth_packet.Get(); + const int right_iris_depth_cm = std::round(right_iris_depth_mm / 10); + std::cout << "Right Iris Depth: " << right_iris_depth_cm << " cm." + << std::endl; + + ::mediapipe::Packet output_image_packet; + if (!output_image_poller.Next(&output_image_packet)) { + return ::mediapipe::UnknownError( + "Failed to get packet from output stream 'output_image'."); + } + auto& output_frame = output_image_packet.Get<::mediapipe::ImageFrame>(); + + // Convert back to opencv for display or saving. + cv::Mat output_frame_mat = ::mediapipe::formats::MatView(&output_frame); + cv::cvtColor(output_frame_mat, output_frame_mat, cv::COLOR_RGB2BGR); + const bool save_image = !FLAGS_output_image_path.empty(); + if (save_image) { + LOG(INFO) << "Saving image to file..."; + cv::imwrite(FLAGS_output_image_path, output_frame_mat); + } else { + cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); + cv::imshow(kWindowName, output_frame_mat); + // Press any key to exit. + cv::waitKey(0); + } + + LOG(INFO) << "Shutting down."; + MP_RETURN_IF_ERROR(graph->CloseInputStream(kInputStream)); + return graph->WaitUntilDone(); +} + +::mediapipe::Status RunMPPGraph() { + std::string calculator_graph_config_contents; + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents( + kCalculatorGraphConfigFile, &calculator_graph_config_contents)); + LOG(INFO) << "Get calculator graph config contents: " + << calculator_graph_config_contents; + ::mediapipe::CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie<::mediapipe::CalculatorGraphConfig>( + calculator_graph_config_contents); + + LOG(INFO) << "Initialize the calculator graph."; + std::unique_ptr<::mediapipe::CalculatorGraph> graph = + absl::make_unique<::mediapipe::CalculatorGraph>(); + MP_RETURN_IF_ERROR(graph->Initialize(config)); + + const bool load_image = !FLAGS_input_image_path.empty(); + if (load_image) { + return ProcessImage(std::move(graph)); + } else { + return ::mediapipe::InvalidArgumentError("Missing image file."); + } +} + +} // namespace + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + gflags::ParseCommandLineFlags(&argc, &argv, true); + ::mediapipe::Status run_status = RunMPPGraph(); + if (!run_status.ok()) { + LOG(ERROR) << "Failed to run the graph: " << run_status.message(); + return EXIT_FAILURE; + } else { + LOG(INFO) << "Success!"; + } + return EXIT_SUCCESS; +} diff --git a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard index e3bd912a4..20845c12f 100644 --- a/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard +++ b/mediapipe/examples/ios/edgedetectiongpu/Base.lproj/Main.storyboard @@ -1,10 +1,8 @@ - - - - + + - + @@ -18,11 +16,11 @@ - +