Merge branch 'google:master' into master

This commit is contained in:
Tarun Jain 2023-03-02 21:01:21 +05:30 committed by GitHub
commit 6e7018b826
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
118 changed files with 11027 additions and 643 deletions

View File

@ -19,6 +19,17 @@ ML solutions for live and streaming media.
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png) ![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable* ***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
----
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023.*
*This notice and web page will be removed on April 3, 2023.*
----
## ML solutions in MediaPipe ## ML solutions in MediaPipe
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic Face Detection | Face Mesh | Iris | Hands | Pose | Holistic

View File

@ -0,0 +1,13 @@
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta http-equiv="refresh" content="0;url={{ page.target }}"/>
<link rel="canonical" href="{{ page.target }}"/>
<title>Redirecting</title>
</head>
<body>
<p>This page now lives on https://developers.google.com/mediapipe/. If you aren't automatically
redirected, follow this
<a href="{{ page.target }}">link</a>.</p>
</body>
</html>

View File

@ -593,3 +593,105 @@ CalculatorGraphConfig BuildGraph() {
return graph.GetConfig(); return graph.GetConfig();
} }
``` ```
### Separate nodes for better readability
```c++ {.bad}
CalculatorGraphConfig BuildGraph() {
Graph graph;
// Inputs.
Stream<A> a = graph.In(0).Cast<A>();
auto& node1 = graph.AddNode("Calculator1");
a.ConnectTo(node1.In("INPUT"));
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
auto& node2 = graph.AddNode("Calculator2");
b.ConnectTo(node2.In("INPUT"));
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
auto& node3 = graph.AddNode("Calculator3");
b.ConnectTo(node3.In("INPUT_B"));
c.ConnectTo(node3.In("INPUT_C"));
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
auto& node4 = graph.AddNode("Calculator4");
b.ConnectTo(node4.In("INPUT_B"));
c.ConnectTo(node4.In("INPUT_C"));
d.ConnectTo(node4.In("INPUT_D"));
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
// Outputs.
b.SetName("b").ConnectTo(graph.Out(0));
c.SetName("c").ConnectTo(graph.Out(1));
d.SetName("d").ConnectTo(graph.Out(2));
e.SetName("e").ConnectTo(graph.Out(3));
return graph.GetConfig();
}
```
In the above code, it can be hard to grasp the idea where each node begins and
ends. To improve this and help your code readers, you can simply have blank
lines before and after each node:
```c++ {.good}
CalculatorGraphConfig BuildGraph() {
Graph graph;
// Inputs.
Stream<A> a = graph.In(0).Cast<A>();
auto& node1 = graph.AddNode("Calculator1");
a.ConnectTo(node1.In("INPUT"));
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
auto& node2 = graph.AddNode("Calculator2");
b.ConnectTo(node2.In("INPUT"));
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
auto& node3 = graph.AddNode("Calculator3");
b.ConnectTo(node3.In("INPUT_B"));
c.ConnectTo(node3.In("INPUT_C"));
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
auto& node4 = graph.AddNode("Calculator4");
b.ConnectTo(node4.In("INPUT_B"));
c.ConnectTo(node4.In("INPUT_C"));
d.ConnectTo(node4.In("INPUT_D"));
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
// Outputs.
b.SetName("b").ConnectTo(graph.Out(0));
c.SetName("c").ConnectTo(graph.Out(1));
d.SetName("d").ConnectTo(graph.Out(2));
e.SetName("e").ConnectTo(graph.Out(3));
return graph.GetConfig();
}
```
Also, the above representation matches `CalculatorGraphConfig` proto
representation better.
If you extract nodes into utility functions, they are scoped within functions
already and it's clear where they begin and end, so it's completely fine to
have:
```c++ {.good}
CalculatorGraphConfig BuildGraph() {
Graph graph;
// Inputs.
Stream<A> a = graph.In(0).Cast<A>();
Stream<B> b = RunCalculator1(a, graph);
Stream<C> c = RunCalculator2(b, graph);
Stream<D> d = RunCalculator3(b, c, graph);
Stream<E> e = RunCalculator4(b, c, d, graph);
// Outputs.
b.SetName("b").ConnectTo(graph.Out(0));
c.SetName("c").ConnectTo(graph.Out(1));
d.SetName("d").ConnectTo(graph.Out(2));
e.SetName("e").ConnectTo(graph.Out(3));
return graph.GetConfig();
}
```

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/calculators
title: Calculators title: Calculators
parent: Framework Concepts parent: Framework Concepts
nav_order: 1 nav_order: 1

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/overview
title: Framework Concepts title: Framework Concepts
nav_order: 5 nav_order: 5
has_children: true has_children: true

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/gpu
title: GPU title: GPU
parent: Framework Concepts parent: Framework Concepts
nav_order: 5 nav_order: 5

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/graphs
title: Graphs title: Graphs
parent: Framework Concepts parent: Framework Concepts
nav_order: 2 nav_order: 2

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/packets
title: Packets title: Packets
parent: Framework Concepts parent: Framework Concepts
nav_order: 3 nav_order: 3

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/realtime_streams
title: Real-time Streams title: Real-time Streams
parent: Framework Concepts parent: Framework Concepts
nav_order: 6 nav_order: 6

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/framework_concepts/synchronization
title: Synchronization title: Synchronization
parent: Framework Concepts parent: Framework Concepts
nav_order: 4 nav_order: 4

View File

@ -13,6 +13,17 @@ nav_order: 2
{:toc} {:toc}
--- ---
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023. This content will not be moved to
the new site, but will remain available in the source code repository on an
as-is basis.*
*This notice and web page will be removed on April 3, 2023.*
----
MediaPipe Android Solution APIs (currently in alpha) are available in: MediaPipe Android Solution APIs (currently in alpha) are available in:
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api) * [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)

View File

@ -12,6 +12,17 @@ nav_exclude: true
{:toc} {:toc}
--- ---
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023. This content will not be moved to
the new site, but will remain available in the source code repository on an
as-is basis.*
*This notice and web page will be removed on April 3, 2023.*
----
### Android ### Android
Please see these [instructions](./android.md). Please see these [instructions](./android.md).

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/getting_started/faq
title: FAQ title: FAQ
parent: Getting Started parent: Getting Started
nav_order: 9 nav_order: 9
@ -59,7 +60,7 @@ The second approach allows up to [`max_in_flight`] invocations of the
packets from [`CalculatorBase::Process`] are automatically ordered by timestamp packets from [`CalculatorBase::Process`] are automatically ordered by timestamp
before they are passed along to downstream calculators. before they are passed along to downstream calculators.
With either aproach, you must be aware that the calculator running in parallel With either approach, you must be aware that the calculator running in parallel
cannot maintain internal state in the same way as a normal sequential cannot maintain internal state in the same way as a normal sequential
calculator. calculator.

View File

@ -11,3 +11,14 @@ has_children: true
1. TOC 1. TOC
{:toc} {:toc}
--- ---
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023. This content will not be moved to
the new site, but will remain available in the source code repository on an
as-is basis.*
*This notice and web page will be removed on April 3, 2023.*
----

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/getting_started/gpu_support
title: GPU Support title: GPU Support
parent: Getting Started parent: Getting Started
nav_order: 7 nav_order: 7

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/getting_started/help
title: Getting Help title: Getting Help
parent: Getting Started parent: Getting Started
nav_order: 8 nav_order: 8
@ -37,8 +38,8 @@ If you open a GitHub issue, here is our policy:
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: - **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**: - **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
- **Bazel version**: - **Bazel version**:
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev enviroment)**: - **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev environment)**:
- **Xcode & Tulsi version (if issue is related to building in mobile dev enviroment)**: - **Xcode & Tulsi version (if issue is related to building in mobile dev environment)**:
- **Exact steps to reproduce**: - **Exact steps to reproduce**:
### Describe the problem ### Describe the problem

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/getting_started/install
title: Installation title: Installation
parent: Getting Started parent: Getting Started
nav_order: 6 nav_order: 6

View File

@ -12,6 +12,17 @@ nav_order: 4
{:toc} {:toc}
--- ---
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023. This content will not be moved to
the new site, but will remain available in the source code repository on an
as-is basis.*
*This notice and web page will be removed on April 3, 2023.*
----
## Ready-to-use JavaScript Solutions ## Ready-to-use JavaScript Solutions
MediaPipe currently offers the following solutions: MediaPipe currently offers the following solutions:
@ -33,7 +44,7 @@ snippets.
| Browser | Platform | Notes | | Browser | Platform | Notes |
| ------- | ----------------------- | -------------------------------------- | | ------- | ----------------------- | -------------------------------------- |
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia | | Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuchsia |
| | | unsupported. | | | | unsupported. |
| Chrome | iOS | Camera unavailable in Chrome on iOS. | | Chrome | iOS | Camera unavailable in Chrome on iOS. |
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / | | Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/getting_started/troubleshooting
title: Troubleshooting title: Troubleshooting
parent: Getting Started parent: Getting Started
nav_order: 10 nav_order: 10
@ -65,7 +66,7 @@ WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/gith
``` ```
usually indicates that Bazel fails to download necessary dependency repositories usually indicates that Bazel fails to download necessary dependency repositories
that MediaPipe needs. MedaiPipe has several dependency repositories that are that MediaPipe needs. MediaPipe has several dependency repositories that are
hosted by Google sites. In some regions, you may need to set up a network proxy hosted by Google sites. In some regions, you may need to set up a network proxy
or use a VPN to access those resources. You may also need to append or use a VPN to access those resources. You may also need to append
`--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"` `--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"`

View File

@ -19,6 +19,17 @@ ML solutions for live and streaming media.
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png) ![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable* ***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
----
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
as the primary developer documentation
site for MediaPipe starting April 3, 2023.*
*This notice and web page will be removed on April 3, 2023.*
----
## ML solutions in MediaPipe ## ML solutions in MediaPipe
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic Face Detection | Face Mesh | Iris | Hands | Pose | Holistic

View File

@ -18,6 +18,16 @@ nav_order: 14
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This

View File

@ -18,6 +18,16 @@ nav_order: 10
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe Box Tracking has been powering real-time tracking in MediaPipe Box Tracking has been powering real-time tracking in

View File

@ -18,6 +18,16 @@ nav_order: 1
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe Face Detection is an ultrafast face detection solution that comes with MediaPipe Face Detection is an ultrafast face detection solution that comes with

View File

@ -18,6 +18,16 @@ nav_order: 2
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
@ -133,7 +143,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
detection in the screen coordinate space: the X- and Y- coordinates are detection in the screen coordinate space: the X- and Y- coordinates are
normalized screen coordinates, while the Z coordinate is relative and is scaled normalized screen coordinates, while the Z coordinate is relative and is scaled
as the X coodinate under the as the X coordinate under the
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection). [weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
This format is well-suited for some applications, however it does not directly This format is well-suited for some applications, however it does not directly
enable the full spectrum of augmented reality (AR) features like aligning a enable the full spectrum of augmented reality (AR) features like aligning a

View File

@ -18,6 +18,16 @@ nav_order: 8
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
![hair_segmentation_android_gpu_gif](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu.gif) ![hair_segmentation_android_gpu_gif](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu.gif)
## Example Apps ## Example Apps

View File

@ -18,6 +18,16 @@ nav_order: 4
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
The ability to perceive the shape and motion of hands can be a vital component The ability to perceive the shape and motion of hands can be a vital component

View File

@ -18,6 +18,16 @@ nav_order: 6
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
Live perception of simultaneous [human pose](./pose.md), Live perception of simultaneous [human pose](./pose.md),

View File

@ -18,6 +18,16 @@ nav_order: 11
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
Augmented Reality (AR) technology creates fun, engaging, and immersive user Augmented Reality (AR) technology creates fun, engaging, and immersive user

View File

@ -18,6 +18,16 @@ nav_order: 3
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
A wide range of real-world applications, including computational photography A wide range of real-world applications, including computational photography
@ -38,7 +48,7 @@ camera, in real-time, without the need for specialized hardware. Through use of
iris landmarks, the solution is also able to determine the metric distance iris landmarks, the solution is also able to determine the metric distance
between the subject and the camera with relative error less than 10%. Note that between the subject and the camera with relative error less than 10%. Note that
iris tracking does not infer the location at which people are looking, nor does iris tracking does not infer the location at which people are looking, nor does
it provide any form of identity recognition. With the cross-platfrom capability it provide any form of identity recognition. With the cross-platform capability
of the MediaPipe framework, MediaPipe Iris can run on most modern of the MediaPipe framework, MediaPipe Iris can run on most modern
[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the [mobile phones](#mobile), [desktops/laptops](#desktop) and even on the
[web](#web). [web](#web).
@ -99,7 +109,7 @@ You can also find more details in this
### Iris Landmark Model ### Iris Landmark Model
The iris model takes an image patch of the eye region and estimates both the eye The iris model takes an image patch of the eye region and estimates both the eye
landmarks (along the eyelid) and iris landmarks (along ths iris contour). You landmarks (along the eyelid) and iris landmarks (along this iris contour). You
can find more details in this [paper](https://arxiv.org/abs/2006.11341). can find more details in this [paper](https://arxiv.org/abs/2006.11341).
![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) | ![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) |

View File

@ -18,6 +18,16 @@ nav_order: 13
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe KNIFT is a template-based feature matching solution using KNIFT MediaPipe KNIFT is a template-based feature matching solution using KNIFT

View File

@ -18,6 +18,16 @@ nav_order: 15
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe is a useful and general framework for media processing that can MediaPipe is a useful and general framework for media processing that can
@ -85,7 +95,7 @@ process new data sets, in the documentation of
MediaSequence uses SequenceExamples as the format of both inputs and MediaSequence uses SequenceExamples as the format of both inputs and
outputs. Annotations are encoded as inputs in a SequenceExample of metadata outputs. Annotations are encoded as inputs in a SequenceExample of metadata
that defines the labels and the path to the cooresponding video file. This that defines the labels and the path to the corresponding video file. This
metadata is passed as input to the C++ `media_sequence_demo` binary, and the metadata is passed as input to the C++ `media_sequence_demo` binary, and the
output is a SequenceExample filled with images and annotations ready for output is a SequenceExample filled with images and annotations ready for
model training. model training.

View File

@ -12,6 +12,20 @@ nav_order: 30
{:toc} {:toc}
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
as of March 1, 2023. All other
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
to a new MediaPipe Solution. The code repository and prebuilt binaries for all
MediaPipe Legacy Solutions will continue to be provided on an as-is basis.
We encourage you to check out the new MediaPipe Solutions at:
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
*This notice and web page will be removed on April 3, 2023.*
----
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection) ### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
* Short-range model (best for faces within 2 meters from the camera): * Short-range model (best for faces within 2 meters from the camera):

View File

@ -18,6 +18,16 @@ nav_order: 9
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
![object_detection_android_gpu.gif](https://mediapipe.dev/images/mobile/object_detection_android_gpu.gif) ![object_detection_android_gpu.gif](https://mediapipe.dev/images/mobile/object_detection_android_gpu.gif)
## Example Apps ## Example Apps

View File

@ -18,6 +18,16 @@ nav_order: 12
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
MediaPipe Objectron is a mobile real-time 3D object detection solution for MediaPipe Objectron is a mobile real-time 3D object detection solution for
@ -170,7 +180,7 @@ and a
The detection subgraph performs ML inference only once every few frames to The detection subgraph performs ML inference only once every few frames to
reduce computation load, and decodes the output tensor to a FrameAnnotation that reduce computation load, and decodes the output tensor to a FrameAnnotation that
contains nine keypoints: the 3D bounding box's center and its eight vertices. contains nine keypoints: the 3D bounding box's center and its eight vertices.
The tracking subgraph runs every frame, using the box traker in The tracking subgraph runs every frame, using the box tracker in
[MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly [MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly
enclosing the projection of the 3D bounding box, and lifts the tracked 2D enclosing the projection of the 3D bounding box, and lifts the tracked 2D
keypoints to 3D with keypoints to 3D with
@ -613,7 +623,7 @@ z_ndc = 1 / Z
### Pixel Space ### Pixel Space
In this API we set upper-left coner of an image as the origin of pixel In this API we set upper-left corner of an image as the origin of pixel
coordinate. One can convert from NDC to pixel space as follows: coordinate. One can convert from NDC to pixel space as follows:
``` ```

View File

@ -20,6 +20,16 @@ nav_order: 5
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
Human pose estimation from video plays a critical role in various applications Human pose estimation from video plays a critical role in various applications

View File

@ -19,6 +19,16 @@ nav_order: 1
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
One of the applications One of the applications

View File

@ -18,6 +18,16 @@ nav_order: 7
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
Solution. For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
## Overview ## Overview
*Fig 1. Example of MediaPipe Selfie Segmentation.* | *Fig 1. Example of MediaPipe Selfie Segmentation.* |

View File

@ -13,7 +13,21 @@ has_toc: false
{:toc} {:toc}
--- ---
Note: These solutions are no longer actively maintained. Consider using or migrating to the new [MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide). **Attention:** *Thank you for your interest in MediaPipe Solutions. We have
ended support for
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
as of March 1, 2023. All other
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
to a new MediaPipe Solution. The
[code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to
be provided on an as-is basis. We encourage you to check out the new MediaPipe
Solutions at:
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
*This notice and web page will be removed on June 1, 2023.*
----
MediaPipe offers open source cross-platform, customizable ML solutions for live MediaPipe offers open source cross-platform, customizable ML solutions for live
and streaming media. and streaming media.

View File

@ -18,6 +18,16 @@ nav_order: 16
</details> </details>
--- ---
**Attention:** *Thank you for your interest in MediaPipe Solutions.
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
For more information, see the new
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
site.*
*This notice and web page will be removed on April 3, 2023.*
----
MediaPipe is a useful and general framework for media processing that can assist MediaPipe is a useful and general framework for media processing that can assist
with research, development, and deployment of ML models. This example focuses on with research, development, and deployment of ML models. This example focuses on
model development by demonstrating how to prepare training data and do model model development by demonstrating how to prepare training data and do model

View File

@ -1,5 +1,6 @@
--- ---
layout: default layout: forward
target: https://developers.google.com/mediapipe/framework/tools/visualizer
title: Visualizer title: Visualizer
parent: Tools parent: Tools
nav_order: 1 nav_order: 1

View File

@ -48,7 +48,6 @@ class MergeToVectorCalculator : public Node {
} }
absl::Status Process(CalculatorContext* cc) { absl::Status Process(CalculatorContext* cc) {
const int input_num = kIn(cc).Count();
std::vector<T> output_vector; std::vector<T> output_vector;
for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) { for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) {
const auto& elem = *it; const auto& elem = *it;

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# #
load("@bazel_skylib//lib:selects.bzl", "selects")
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type") load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type")
@ -23,6 +24,14 @@ package(
licenses(["notice"]) licenses(["notice"])
selects.config_setting_group(
name = "ios_or_disable_gpu",
match_any = [
"//mediapipe/gpu:disable_gpu",
"//mediapipe:ios",
],
)
mediapipe_proto_library( mediapipe_proto_library(
name = "detection_proto", name = "detection_proto",
srcs = ["detection.proto"], srcs = ["detection.proto"],
@ -336,9 +345,7 @@ cc_library(
"//conditions:default": [ "//conditions:default": [
"//mediapipe/gpu:gl_texture_buffer", "//mediapipe/gpu:gl_texture_buffer",
], ],
"//mediapipe:ios": [ "ios_or_disable_gpu": [],
],
"//mediapipe/gpu:disable_gpu": [],
}) + select({ }) + select({
"//conditions:default": [], "//conditions:default": [],
"//mediapipe:apple": [ "//mediapipe:apple": [

View File

@ -18,15 +18,16 @@
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h" #include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
namespace mediapipe { namespace mediapipe {
namespace tool { namespace tool {
absl::Status StatusInvalid(const std::string& message) { absl::Status StatusInvalid(absl::string_view message) {
return absl::Status(absl::StatusCode::kInvalidArgument, message); return absl::Status(absl::StatusCode::kInvalidArgument, message);
} }
absl::Status StatusFail(const std::string& message) { absl::Status StatusFail(absl::string_view message) {
return absl::Status(absl::StatusCode::kUnknown, message); return absl::Status(absl::StatusCode::kUnknown, message);
} }
@ -35,12 +36,12 @@ absl::Status StatusStop() {
"mediapipe::tool::StatusStop()"); "mediapipe::tool::StatusStop()");
} }
absl::Status AddStatusPrefix(const std::string& prefix, absl::Status AddStatusPrefix(absl::string_view prefix,
const absl::Status& status) { const absl::Status& status) {
return absl::Status(status.code(), absl::StrCat(prefix, status.message())); return absl::Status(status.code(), absl::StrCat(prefix, status.message()));
} }
absl::Status CombinedStatus(const std::string& general_comment, absl::Status CombinedStatus(absl::string_view general_comment,
const std::vector<absl::Status>& statuses) { const std::vector<absl::Status>& statuses) {
// The final error code is absl::StatusCode::kUnknown if not all // The final error code is absl::StatusCode::kUnknown if not all
// the error codes are the same. Otherwise it is the same error code // the error codes are the same. Otherwise it is the same error code

View File

@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "absl/base/macros.h" #include "absl/base/macros.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
namespace mediapipe { namespace mediapipe {
@ -34,16 +35,16 @@ absl::Status StatusStop();
// Return a status which signals an invalid initial condition (for // Return a status which signals an invalid initial condition (for
// example an InputSidePacket does not include all necessary fields). // example an InputSidePacket does not include all necessary fields).
ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.") ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.")
absl::Status StatusInvalid(const std::string& error_message); absl::Status StatusInvalid(absl::string_view error_message);
// Return a status which signals that something unexpectedly failed. // Return a status which signals that something unexpectedly failed.
ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.") ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.")
absl::Status StatusFail(const std::string& error_message); absl::Status StatusFail(absl::string_view error_message);
// Prefixes the given string to the error message in status. // Prefixes the given string to the error message in status.
// This function should be considered internal to the framework. // This function should be considered internal to the framework.
// TODO Replace usage of AddStatusPrefix with util::Annotate(). // TODO Replace usage of AddStatusPrefix with util::Annotate().
absl::Status AddStatusPrefix(const std::string& prefix, absl::Status AddStatusPrefix(absl::string_view prefix,
const absl::Status& status); const absl::Status& status);
// Combine a vector of absl::Status into a single composite status. // Combine a vector of absl::Status into a single composite status.
@ -51,7 +52,7 @@ absl::Status AddStatusPrefix(const std::string& prefix,
// will be returned. // will be returned.
// This function should be considered internal to the framework. // This function should be considered internal to the framework.
// TODO Move this function to somewhere with less visibility. // TODO Move this function to somewhere with less visibility.
absl::Status CombinedStatus(const std::string& general_comment, absl::Status CombinedStatus(absl::string_view general_comment,
const std::vector<absl::Status>& statuses); const std::vector<absl::Status>& statuses);
} // namespace tool } // namespace tool

View File

@ -15,7 +15,9 @@
package com.google.mediapipe.components; package com.google.mediapipe.components;
import static java.lang.Math.max; import static java.lang.Math.max;
import static java.lang.Math.min;
import android.graphics.Bitmap;
import android.graphics.SurfaceTexture; import android.graphics.SurfaceTexture;
import android.opengl.GLES11Ext; import android.opengl.GLES11Ext;
import android.opengl.GLES20; import android.opengl.GLES20;
@ -25,9 +27,12 @@ import android.util.Log;
import com.google.mediapipe.framework.TextureFrame; import com.google.mediapipe.framework.TextureFrame;
import com.google.mediapipe.glutil.CommonShaders; import com.google.mediapipe.glutil.CommonShaders;
import com.google.mediapipe.glutil.ShaderUtil; import com.google.mediapipe.glutil.ShaderUtil;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer; import java.nio.FloatBuffer;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import javax.microedition.khronos.egl.EGLConfig; import javax.microedition.khronos.egl.EGLConfig;
import javax.microedition.khronos.opengles.GL10; import javax.microedition.khronos.opengles.GL10;
@ -44,6 +49,13 @@ import javax.microedition.khronos.opengles.GL10;
* {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}). * {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}).
*/ */
public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
/**
* Listener for Bitmap capture requests.
*/
public interface BitmapCaptureListener {
void onBitmapCaptured(Bitmap result);
}
private static final String TAG = "DemoRenderer"; private static final String TAG = "DemoRenderer";
private static final int ATTRIB_POSITION = 1; private static final int ATTRIB_POSITION = 1;
private static final int ATTRIB_TEXTURE_COORDINATE = 2; private static final int ATTRIB_TEXTURE_COORDINATE = 2;
@ -56,12 +68,32 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
private int frameUniform; private int frameUniform;
private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES; private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES;
private int textureTransformUniform; private int textureTransformUniform;
private boolean shouldFitToWidth = false;
// Controls the alignment between frame size and surface size, 0.5f default is centered. // Controls the alignment between frame size and surface size, 0.5f default is centered.
private float alignmentHorizontal = 0.5f; private float alignmentHorizontal = 0.5f;
private float alignmentVertical = 0.5f; private float alignmentVertical = 0.5f;
private float[] textureTransformMatrix = new float[16]; private float[] textureTransformMatrix = new float[16];
private SurfaceTexture surfaceTexture = null; private SurfaceTexture surfaceTexture = null;
private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>(); private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>();
private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean();
private BitmapCaptureListener bitmapCaptureListener;
/**
* Sets the {@link BitmapCaptureListener}.
*/
public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) {
this.bitmapCaptureListener = bitmapCaptureListener;
}
/**
* Request to capture Bitmap of the next frame.
*
* The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note
* this is an expensive operation and the result may not be available for a while.
*/
public void captureNextFrameBitmap() {
captureNextFrameBitmap.set(true);
}
@Override @Override
public void onSurfaceCreated(GL10 gl, EGLConfig config) { public void onSurfaceCreated(GL10 gl, EGLConfig config) {
@ -147,6 +179,31 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4); GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
ShaderUtil.checkGlError("glDrawArrays"); ShaderUtil.checkGlError("glDrawArrays");
// Capture Bitmap if requested.
BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener;
if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) {
int bitmapSize = surfaceWidth * surfaceHeight;
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4);
byteBuffer.order(ByteOrder.nativeOrder());
GLES20.glReadPixels(
0, 0, surfaceWidth, surfaceHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, byteBuffer);
int[] pixelBuffer = new int[bitmapSize];
byteBuffer.asIntBuffer().get(pixelBuffer);
for (int i = 0; i < bitmapSize; i++) {
// Swap R and B channels.
pixelBuffer[i] =
(pixelBuffer[i] & 0xff00ff00)
| ((pixelBuffer[i] & 0x000000ff) << 16)
| ((pixelBuffer[i] & 0x00ff0000) >> 16);
}
Bitmap bitmap = Bitmap.createBitmap(surfaceWidth, surfaceHeight, Bitmap.Config.ARGB_8888);
bitmap.setPixels(
pixelBuffer, /* offset= */bitmapSize - surfaceWidth, /* stride= */-surfaceWidth,
/* x= */0, /* y= */0, surfaceWidth, surfaceHeight);
bitmapCaptureListener.onBitmapCaptured(bitmap);
}
GLES20.glBindTexture(textureTarget, 0); GLES20.glBindTexture(textureTarget, 0);
ShaderUtil.checkGlError("unbind surfaceTexture"); ShaderUtil.checkGlError("unbind surfaceTexture");
@ -158,13 +215,17 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
// TODO: compute scale from surfaceTexture size. // TODO: compute scale from surfaceTexture size.
float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f; float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f;
float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f; float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f;
// Whichever of the two scales is greater corresponds to the dimension where the image // By default whichever of the two scales is greater corresponds to the dimension where the
// is proportionally smaller than the view. Dividing both scales by that number results // image is proportionally smaller than the view. Dividing both scales by that number results
// in that dimension having scale 1.0, and thus touching the edges of the view, while the // in that dimension having scale 1.0, and thus touching the edges of the view, while the
// other is cropped proportionally. // other is cropped proportionally. If shouldFitToWidth is set as true, use the min scale
float maxScale = max(scaleWidth, scaleHeight); // if frame width is greater than frame height.
scaleWidth /= maxScale; float scale = max(scaleWidth, scaleHeight);
scaleHeight /= maxScale; if (shouldFitToWidth && (frameWidth > frameHeight)) {
scale = min(scaleWidth, scaleHeight);
}
scaleWidth /= scale;
scaleHeight /= scale;
// Alignment controls where the visible section is placed within the full camera frame, with // Alignment controls where the visible section is placed within the full camera frame, with
// (0, 0) being the bottom left, and (1, 1) being the top right. // (0, 0) being the bottom left, and (1, 1) being the top right.
@ -232,6 +293,11 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
frameHeight = height; frameHeight = height;
} }
/** Supports fit to width when the frame width is greater than the frame height. */
public void setShouldFitToWidth(boolean shouldFitToWidth) {
this.shouldFitToWidth = shouldFitToWidth;
}
/** /**
* When the aspect ratios between the camera frame and the surface size are mismatched, this * When the aspect ratios between the camera frame and the surface size are mismatched, this
* controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning * controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning

View File

@ -35,7 +35,6 @@ cc_library(
"//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto", "//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto",
"//mediapipe/tasks/cc/components/processors:embedder_options", "//mediapipe/tasks/cc/components/processors:embedder_options",
"//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto", "//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto",
"//mediapipe/tasks/cc/components/utils:cosine_similarity",
"//mediapipe/tasks/cc/core:base_options", "//mediapipe/tasks/cc/core:base_options",
"//mediapipe/tasks/cc/core:task_runner", "//mediapipe/tasks/cc/core:task_runner",
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",

View File

@ -29,7 +29,6 @@ limitations under the License.
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h" #include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
#include "mediapipe/tasks/cc/components/processors/embedder_options.h" #include "mediapipe/tasks/cc/components/processors/embedder_options.h"
#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h" #include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h"
#include "mediapipe/tasks/cc/components/utils/cosine_similarity.h"
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
#include "mediapipe/tasks/cc/core/task_runner.h" #include "mediapipe/tasks/cc/core/task_runner.h"
#include "tensorflow/lite/core/api/op_resolver.h" #include "tensorflow/lite/core/api/op_resolver.h"
@ -147,10 +146,4 @@ absl::Status AudioEmbedder::EmbedAsync(Matrix audio_block,
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}); .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
} }
absl::StatusOr<double> AudioEmbedder::CosineSimilarity(
const components::containers::Embedding& u,
const components::containers::Embedding& v) {
return components::utils::CosineSimilarity(u, v);
}
} // namespace mediapipe::tasks::audio::audio_embedder } // namespace mediapipe::tasks::audio::audio_embedder

View File

@ -125,16 +125,6 @@ class AudioEmbedder : core::BaseAudioTaskApi {
// Shuts down the AudioEmbedder when all works are done. // Shuts down the AudioEmbedder when all works are done.
absl::Status Close() { return runner_->Close(); } absl::Status Close() { return runner_->Close(); }
// Utility function to compute cosine similarity [1] between two embeddings.
// May return an InvalidArgumentError if e.g. the embeddings are of different
// types (quantized vs. float), have different sizes, or have a an L2-norm of
// 0.
//
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
static absl::StatusOr<double> CosineSimilarity(
const components::containers::Embedding& u,
const components::containers::Embedding& v);
}; };
} // namespace mediapipe::tasks::audio::audio_embedder } // namespace mediapipe::tasks::audio::audio_embedder

View File

@ -54,8 +54,6 @@ constexpr char kModelWithMetadata[] = "yamnet_embedding_metadata.tflite";
constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav"; constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav";
constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav"; constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav";
constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav"; constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav";
constexpr float kSpeechSimilarities[] = {0.985359, 0.994349, 0.993227, 0.996658,
0.996384};
constexpr int kMilliSecondsPerSecond = 1000; constexpr int kMilliSecondsPerSecond = 1000;
constexpr int kYamnetNumOfAudioSamples = 15600; constexpr int kYamnetNumOfAudioSamples = 15600;
constexpr int kYamnetAudioSampleRate = 16000; constexpr int kYamnetAudioSampleRate = 16000;
@ -163,15 +161,9 @@ TEST_F(EmbedTest, SucceedsWithSameAudioAtDifferentSampleRates) {
audio_embedder->Embed(audio_buffer1, 16000)); audio_embedder->Embed(audio_buffer1, 16000));
MP_ASSERT_OK_AND_ASSIGN(auto result2, MP_ASSERT_OK_AND_ASSIGN(auto result2,
audio_embedder->Embed(audio_buffer2, 48000)); audio_embedder->Embed(audio_buffer2, 48000));
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float); int expected_size = 5;
ASSERT_EQ(result1.size(), expected_size); ASSERT_EQ(result1.size(), expected_size);
ASSERT_EQ(result2.size(), expected_size); ASSERT_EQ(result2.size(), expected_size);
for (int i = 0; i < expected_size; ++i) {
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
result1[i].embeddings[0],
result2[i].embeddings[0]));
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
}
MP_EXPECT_OK(audio_embedder->Close()); MP_EXPECT_OK(audio_embedder->Close());
} }
@ -192,10 +184,6 @@ TEST_F(EmbedTest, SucceedsWithDifferentAudios) {
audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate)); audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate));
ASSERT_EQ(result1.size(), 5); ASSERT_EQ(result1.size(), 5);
ASSERT_EQ(result2.size(), 1); ASSERT_EQ(result2.size(), 1);
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
result1[0].embeddings[0],
result2[0].embeddings[0]));
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
MP_EXPECT_OK(audio_embedder->Close()); MP_EXPECT_OK(audio_embedder->Close());
} }
@ -258,15 +246,9 @@ TEST_F(EmbedAsyncTest, SucceedsWithSameAudioAtDifferentSampleRates) {
RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1); RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1);
std::vector<AudioEmbedderResult> result2; std::vector<AudioEmbedderResult> result2;
RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2); RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2);
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float); int expected_size = 5;
ASSERT_EQ(result1.size(), expected_size); ASSERT_EQ(result1.size(), expected_size);
ASSERT_EQ(result2.size(), expected_size); ASSERT_EQ(result2.size(), expected_size);
for (int i = 0; i < expected_size; ++i) {
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
result1[i].embeddings[0],
result2[i].embeddings[0]));
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
}
} }
TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) { TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
@ -276,10 +258,6 @@ TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2); RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2);
ASSERT_EQ(result1.size(), 5); ASSERT_EQ(result1.size(), 5);
ASSERT_EQ(result2.size(), 1); ASSERT_EQ(result2.size(), 1);
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
result1[0].embeddings[0],
result2[0].embeddings[0]));
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
} }
} // namespace } // namespace

View File

@ -185,15 +185,15 @@ TEST_P(CalibrationWithoutIndicesTest, Succeeds) {
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest, ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest,
Values(CalibrationTestParams{.score_transformation = "IDENTITY", Values(CalibrationTestParams{
.expected_results = {0.4948505976, /* score_transformation= */ "IDENTITY",
0.5059588508, 0.2, 0.2}}, /* expected_results= */ {0.4948505976, 0.5059588508, 0.2, 0.2}},
CalibrationTestParams{ CalibrationTestParams{
.score_transformation = "LOG", /* score_transformation= */ "LOG",
.expected_results = {0.2976901255, 0.3393665735, 0.2, 0.2}}, /* expected_results= */ {0.2976901255, 0.3393665735, 0.2, 0.2}},
CalibrationTestParams{ CalibrationTestParams{
.score_transformation = "INVERSE_LOGISTIC", /* score_transformation= */ "INVERSE_LOGISTIC",
.expected_results = {0.3203217641, 0.3778080605, 0.2, 0.2}}), /* expected_results= */ {0.3203217641, 0.3778080605, 0.2, 0.2}}),
[](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) { [](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) {
return info.param.score_transformation; return info.param.score_transformation;
}); });

View File

@ -17,6 +17,7 @@ limitations under the License.
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_ #define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_
#include <cstdlib> #include <cstdlib>
#include <optional>
#include <string> #include <string>
#include <vector> #include <vector>

View File

@ -332,9 +332,11 @@ cc_library(
"//mediapipe/tasks:internal", "//mediapipe/tasks:internal",
], ],
deps = [ deps = [
":external_file_handler",
"//mediapipe/calculators/core:flow_limiter_calculator_cc_proto", "//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
"//mediapipe/framework:calculator_cc_proto", "//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework/api2:builder", "//mediapipe/framework/api2:builder",
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
"//mediapipe/tasks/metadata:metadata_schema_cc", "//mediapipe/tasks/metadata:metadata_schema_cc",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
"@flatbuffers//:runtime_cc", "@flatbuffers//:runtime_cc",
@ -375,6 +377,5 @@ cc_test(
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto", "//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
"//mediapipe/tasks/cc/metadata/utils:zip_utils", "//mediapipe/tasks/cc/metadata/utils:zip_utils",
"@org_tensorflow//tensorflow/lite/c:common",
], ],
) )

View File

@ -29,7 +29,7 @@ limitations under the License.
#include <windows.h> #include <windows.h>
#else #else
#include <unistd.h> #include <unistd.h>
#endif #endif // _WIN32
#include <memory> #include <memory>
#include <string> #include <string>
@ -102,9 +102,13 @@ absl::StatusOr<std::string> PathToResourceAsFile(std::string path) {
#else #else
if (absl::StartsWith(path, "./")) { if (absl::StartsWith(path, "./")) {
path = "mediapipe" + path.substr(1); path = "mediapipe" + path.substr(1);
} else if (path[0] != '/') {
path = "mediapipe/" + path;
} }
std::string error; std::string error;
// TODO: We should ideally use `CreateForTests` when this is
// accessed from unit tests.
std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles( std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles(
::bazel::tools::cpp::runfiles::Runfiles::Create("", &error)); ::bazel::tools::cpp::runfiles::Runfiles::Create("", &error));
if (!runfiles) { if (!runfiles) {

View File

@ -88,6 +88,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFile) {
.status()); .status());
} }
#ifndef _WIN32
TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) { TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY); const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY);
auto model_file = std::make_unique<proto::ExternalFile>(); auto model_file = std::make_unique<proto::ExternalFile>();
@ -103,6 +104,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite") model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite")
.status()); .status());
} }
#endif // _WIN32
TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) { TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) {
auto file_content = LoadBinaryContent(kTestModelBundlePath); auto file_content = LoadBinaryContent(kTestModelBundlePath);

View File

@ -136,6 +136,7 @@ TEST_F(ModelResourcesTest, CreateFromFile) {
CheckModelResourcesPackets(model_resources.get()); CheckModelResourcesPackets(model_resources.get());
} }
#ifndef _WIN32
TEST_F(ModelResourcesTest, CreateFromFileDescriptor) { TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
const int model_file_descriptor = open(kTestModelPath, O_RDONLY); const int model_file_descriptor = open(kTestModelPath, O_RDONLY);
auto model_file = std::make_unique<proto::ExternalFile>(); auto model_file = std::make_unique<proto::ExternalFile>();
@ -145,6 +146,7 @@ TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
ModelResources::Create(kTestModelResourcesTag, std::move(model_file))); ModelResources::Create(kTestModelResourcesTag, std::move(model_file)));
CheckModelResourcesPackets(model_resources.get()); CheckModelResourcesPackets(model_resources.get());
} }
#endif // _WIN32
TEST_F(ModelResourcesTest, CreateFromInvalidFile) { TEST_F(ModelResourcesTest, CreateFromInvalidFile) {
auto model_file = std::make_unique<proto::ExternalFile>(); auto model_file = std::make_unique<proto::ExternalFile>();
@ -168,6 +170,15 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
auto status_or_model_resources = auto status_or_model_resources =
ModelResources::Create(kTestModelResourcesTag, std::move(model_file)); ModelResources::Create(kTestModelResourcesTag, std::move(model_file));
#ifdef _WIN32
EXPECT_EQ(status_or_model_resources.status().code(),
absl::StatusCode::kFailedPrecondition);
EXPECT_THAT(
status_or_model_resources.status().message(),
testing::HasSubstr("File descriptors are not supported on Windows."));
AssertStatusHasMediaPipeTasksStatusCode(status_or_model_resources.status(),
MediaPipeTasksStatus::kFileReadError);
#else
EXPECT_EQ(status_or_model_resources.status().code(), EXPECT_EQ(status_or_model_resources.status().code(),
absl::StatusCode::kInvalidArgument); absl::StatusCode::kInvalidArgument);
EXPECT_THAT( EXPECT_THAT(
@ -176,6 +187,7 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
AssertStatusHasMediaPipeTasksStatusCode( AssertStatusHasMediaPipeTasksStatusCode(
status_or_model_resources.status(), status_or_model_resources.status(),
MediaPipeTasksStatus::kInvalidArgumentError); MediaPipeTasksStatus::kInvalidArgumentError);
#endif // _WIN32
} }
TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) { TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) {

View File

@ -23,6 +23,8 @@ limitations under the License.
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "flatbuffers/flatbuffers.h" #include "flatbuffers/flatbuffers.h"
#include "mediapipe/calculators/core/flow_limiter_calculator.pb.h" #include "mediapipe/calculators/core/flow_limiter_calculator.pb.h"
#include "mediapipe/tasks/cc/core/external_file_handler.h"
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
@ -34,13 +36,11 @@ constexpr char kFlowLimiterCalculatorName[] = "FlowLimiterCalculator";
} // namespace } // namespace
std::string LoadBinaryContent(const char* filename) { std::string LoadBinaryContent(const char* filename) {
std::ifstream input_file(filename, std::ios::binary | std::ios::ate); proto::ExternalFile external_file;
// Find buffer size from input file, and load the buffer. external_file.set_file_name(filename);
size_t buffer_size = input_file.tellg(); auto file_handler =
std::string buffer(buffer_size, '\0'); ExternalFileHandler::CreateFromExternalFile(&external_file);
input_file.seekg(0, std::ios::beg); return std::string{(*file_handler)->GetFileContent()};
input_file.read(const_cast<char*>(buffer.c_str()), buffer_size);
return buffer;
} }
int FindTensorIndexByMetadataName( int FindTensorIndexByMetadataName(

View File

@ -16,6 +16,7 @@ cc_test(
"//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"//mediapipe/tasks/cc:common", "//mediapipe/tasks/cc:common",
"//mediapipe/tasks/cc/core:utils",
"//mediapipe/tasks/cc/metadata:metadata_extractor", "//mediapipe/tasks/cc/metadata:metadata_extractor",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor", "@com_google_absl//absl/status:statusor",

View File

@ -25,12 +25,14 @@ limitations under the License.
#include "mediapipe/framework/port/status_macros.h" #include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/tasks/cc/common.h" #include "mediapipe/tasks/cc/common.h"
#include "mediapipe/tasks/cc/core/utils.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
namespace metadata { namespace metadata {
namespace { namespace {
using core::LoadBinaryContent;
using ::testing::Optional; using ::testing::Optional;
constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata"; constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata";
@ -53,8 +55,8 @@ constexpr char kRandomTextFile[] = "external_file";
absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor( absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor(
std::string model_name, std::string* file_contents) { std::string model_name, std::string* file_contents) {
MP_RETURN_IF_ERROR(file::GetContents( *file_contents = LoadBinaryContent(
file::JoinPath("./", kTestDataDirectory, model_name), file_contents)); file::JoinPath("./", kTestDataDirectory, model_name).c_str());
return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(), return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(),
file_contents->length()); file_contents->length());
} }

View File

@ -26,7 +26,11 @@ using ::testing::MatchesRegex;
TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) { TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) {
// Validates that the version is well-formed (x.y.z). // Validates that the version is well-formed (x.y.z).
#ifdef _WIN32
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
#else
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+")); EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
#endif // _WIN32
} }
} // namespace } // namespace

View File

@ -83,7 +83,11 @@ TEST(MetadataVersionTest,
builder.GetSize(), &min_version), builder.GetSize(), &min_version),
kTfLiteOk); kTfLiteOk);
// Validates that the version is well-formed (x.y.z). // Validates that the version is well-formed (x.y.z).
#ifdef _WIN32
EXPECT_THAT(min_version, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
#else
EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+")); EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
#endif // _WIN32
} }
TEST(MetadataVersionTest, TEST(MetadataVersionTest,

View File

@ -0,0 +1,49 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//mediapipe/tasks:internal"])
mediapipe_proto_library(
name = "geometry_pipeline_calculator_proto",
srcs = ["geometry_pipeline_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
],
)
cc_library(
name = "geometry_pipeline_calculator",
srcs = ["geometry_pipeline_calculator.cc"],
deps = [
":geometry_pipeline_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/tasks/cc/vision/face_geometry/libs:geometry_pipeline",
"//mediapipe/tasks/cc/vision/face_geometry/libs:validation_utils",
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
"//mediapipe/util:resource_util",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
)

View File

@ -0,0 +1,194 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
#include "mediapipe/util/resource_util.h"
namespace mediapipe::tasks::vision::face_geometry {
namespace {
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
using ::mediapipe::tasks::vision::face_geometry::proto::Environment;
using ::mediapipe::tasks::vision::face_geometry::proto::FaceGeometry;
using ::mediapipe::tasks::vision::face_geometry::proto::
GeometryPipelineMetadata;
// A calculator that renders a visual effect for multiple faces.
//
// Inputs:
// IMAGE_SIZE (`std::pair<int, int>`, required):
// The size of the current frame. The first element of the pair is the frame
// width; the other one is the frame height.
//
// The face landmarks should have been detected on a frame with the same
// ratio. If used as-is, the resulting face geometry visualization should be
// happening on a frame with the same ratio as well.
//
// MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
// A vector of face landmark lists.
//
// Input side packets:
// ENVIRONMENT (`proto::Environment`, required)
// Describes an environment; includes the camera frame origin point location
// as well as virtual camera parameters.
//
// Output:
// MULTI_FACE_GEOMETRY (`std::vector<FaceGeometry>`, required):
// A vector of face geometry data.
//
// Options:
// metadata_path (`string`, optional):
// Defines a path for the geometry pipeline metadata file.
//
// The geometry pipeline metadata file format must be the binary
// `GeometryPipelineMetadata` proto.
//
class GeometryPipelineCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->InputSidePackets().Tag(kEnvironmentTag).Set<Environment>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
cc->Inputs()
.Tag(kMultiFaceLandmarksTag)
.Set<std::vector<mediapipe::NormalizedLandmarkList>>();
cc->Outputs().Tag(kMultiFaceGeometryTag).Set<std::vector<FaceGeometry>>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(mediapipe::TimestampDiff(0));
const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
ASSIGN_OR_RETURN(
GeometryPipelineMetadata metadata,
ReadMetadataFromFile(options.metadata_path()),
_ << "Failed to read the geometry pipeline metadata from file!");
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
<< "Invalid geometry pipeline metadata!";
const Environment& environment =
cc->InputSidePackets().Tag(kEnvironmentTag).Get<Environment>();
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
<< "Invalid environment!";
ASSIGN_OR_RETURN(geometry_pipeline_,
CreateGeometryPipeline(environment, metadata),
_ << "Failed to create a geometry pipeline!");
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
// Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
// to have a non-empty packet. In case this requirement is not met, there's
// nothing to be processed at the current timestamp.
if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& image_size =
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
const auto& multi_face_landmarks =
cc->Inputs()
.Tag(kMultiFaceLandmarksTag)
.Get<std::vector<mediapipe::NormalizedLandmarkList>>();
auto multi_face_geometry = absl::make_unique<std::vector<FaceGeometry>>();
ASSIGN_OR_RETURN(
*multi_face_geometry,
geometry_pipeline_->EstimateFaceGeometry(
multi_face_landmarks, //
/*frame_width*/ image_size.first,
/*frame_height*/ image_size.second),
_ << "Failed to estimate face geometry for multiple faces!");
cc->Outputs()
.Tag(kMultiFaceGeometryTag)
.AddPacket(mediapipe::Adopt<std::vector<FaceGeometry>>(
multi_face_geometry.release())
.At(cc->InputTimestamp()));
return absl::OkStatus();
}
absl::Status Close(CalculatorContext* cc) override {
return absl::OkStatus();
}
private:
static absl::StatusOr<GeometryPipelineMetadata> ReadMetadataFromFile(
const std::string& metadata_path) {
ASSIGN_OR_RETURN(std::string metadata_blob,
ReadContentBlobFromFile(metadata_path),
_ << "Failed to read a metadata blob from file!");
GeometryPipelineMetadata metadata;
RET_CHECK(metadata.ParseFromString(metadata_blob))
<< "Failed to parse a metadata proto from a binary blob!";
return metadata;
}
static absl::StatusOr<std::string> ReadContentBlobFromFile(
const std::string& unresolved_path) {
ASSIGN_OR_RETURN(std::string resolved_path,
mediapipe::PathToResourceAsFile(unresolved_path),
_ << "Failed to resolve path! Path = " << unresolved_path);
std::string content_blob;
MP_RETURN_IF_ERROR(
mediapipe::GetResourceContents(resolved_path, &content_blob))
<< "Failed to read content blob! Resolved path = " << resolved_path;
return content_blob;
}
std::unique_ptr<GeometryPipeline> geometry_pipeline_;
};
} // namespace
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
REGISTER_CALCULATOR(
::mediapipe::tasks::vision::face_geometry::FaceGeometryPipelineCalculator);
} // namespace mediapipe::tasks::vision::face_geometry

View File

@ -0,0 +1,27 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks.vision.face_geometry;
import "mediapipe/framework/calculator_options.proto";
message FaceGeometryPipelineCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional FaceGeometryPipelineCalculatorOptions ext = 512499200;
}
optional string metadata_path = 1;
}

View File

@ -0,0 +1,59 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
encode_binary_proto(
name = "geometry_pipeline_metadata_detection",
input = "geometry_pipeline_metadata_detection.pbtxt",
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata_detection.binarypb",
deps = [
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
],
)
encode_binary_proto(
name = "geometry_pipeline_metadata_landmarks",
input = "geometry_pipeline_metadata_landmarks.pbtxt",
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata_landmarks.binarypb",
deps = [
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
],
)
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
encode_binary_proto(
name = "geometry_pipeline_metadata",
input = "geometry_pipeline_metadata_landmarks.pbtxt",
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata.binarypb",
deps = [
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
],
)
# These canonical face model files are not meant to be used in runtime, but rather for asset
# creation and/or reference.
exports_files([
"canonical_face_model.fbx",
"canonical_face_model.obj",
"canonical_face_model_uv_visualization.png",
])

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 731 KiB

View File

@ -0,0 +1,78 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
input_source: FACE_DETECTION_PIPELINE
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
# NOTE: the triangular topology of the face meshes is only useful when derived
# from the 468 face landmarks, not from the 6 face detection landmarks
# (keypoints). The former don't cover the entire face and this mesh is
# defined here only to comply with the API. It should be considered as
# a placeholder and/or for debugging purposes.
#
# Use the face geometry derived from the face detection landmarks
# (keypoints) for the face pose transformation matrix, not the mesh.
canonical_mesh: {
vertex_type: VERTEX_PT
primitive_type: TRIANGLE
vertex_buffer: -3.1511454582214355
vertex_buffer: 2.6246179342269897
vertex_buffer: 3.4656630754470825
vertex_buffer: 0.349575996398926
vertex_buffer: 0.38137748837470997
vertex_buffer: 3.1511454582214355
vertex_buffer: 2.6246179342269897
vertex_buffer: 3.4656630754470825
vertex_buffer: 0.650443494319916
vertex_buffer: 0.38137999176979054
vertex_buffer: 0.0
vertex_buffer: -1.126865029335022
vertex_buffer: 7.475604057312012
vertex_buffer: 0.500025987625122
vertex_buffer: 0.547487020492554
vertex_buffer: 0.0
vertex_buffer: -4.304508209228516
vertex_buffer: 4.162498950958252
vertex_buffer: 0.499989986419678
vertex_buffer: 0.694203019142151
vertex_buffer: -7.664182186126709
vertex_buffer: 0.673132002353668
vertex_buffer: -2.435867071151733
vertex_buffer: 0.007561000064015
vertex_buffer: 0.480777025222778
vertex_buffer: 7.664182186126709
vertex_buffer: 0.673132002353668
vertex_buffer: -2.435867071151733
vertex_buffer: 0.992439985275269
vertex_buffer: 0.480777025222778
index_buffer: 0
index_buffer: 1
index_buffer: 2
index_buffer: 1
index_buffer: 5
index_buffer: 2
index_buffer: 4
index_buffer: 0
index_buffer: 2
index_buffer: 4
index_buffer: 2
index_buffer: 3
index_buffer: 2
index_buffer: 5
index_buffer: 3
}

View File

@ -0,0 +1,80 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "geometry_pipeline",
srcs = ["geometry_pipeline.cc"],
hdrs = ["geometry_pipeline.h"],
deps = [
":mesh_3d_utils",
":procrustes_solver",
":validation_utils",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:matrix_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
"@com_google_absl//absl/memory",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "mesh_3d_utils",
srcs = ["mesh_3d_utils.cc"],
hdrs = ["mesh_3d_utils.h"],
deps = [
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:statusor",
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
],
)
cc_library(
name = "procrustes_solver",
srcs = ["procrustes_solver.cc"],
hdrs = ["procrustes_solver.h"],
deps = [
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"@com_google_absl//absl/memory",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "validation_utils",
srcs = ["validation_utils.cc"],
hdrs = ["validation_utils.h"],
deps = [
":mesh_3d_utils",
"//mediapipe/framework/formats:matrix_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
],
)

View File

@ -0,0 +1,471 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
#include <cmath>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "Eigen/Core"
#include "absl/memory/memory.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
namespace {
struct PerspectiveCameraFrustum {
// NOTE: all arguments must be validated prior to calling this constructor.
PerspectiveCameraFrustum(const proto::PerspectiveCamera& perspective_camera,
int frame_width, int frame_height) {
static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
const float height_at_near =
2.f * perspective_camera.near() *
std::tan(0.5f * kDegreesToRadians *
perspective_camera.vertical_fov_degrees());
const float width_at_near = frame_width * height_at_near / frame_height;
left = -0.5f * width_at_near;
right = 0.5f * width_at_near;
bottom = -0.5f * height_at_near;
top = 0.5f * height_at_near;
near = perspective_camera.near();
far = perspective_camera.far();
}
float left;
float right;
float bottom;
float top;
float near;
float far;
};
class ScreenToMetricSpaceConverter {
public:
ScreenToMetricSpaceConverter(
proto::OriginPointLocation origin_point_location, //
proto::InputSource input_source, //
Eigen::Matrix3Xf&& canonical_metric_landmarks, //
Eigen::VectorXf&& landmark_weights, //
std::unique_ptr<ProcrustesSolver> procrustes_solver)
: origin_point_location_(origin_point_location),
input_source_(input_source),
canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
landmark_weights_(std::move(landmark_weights)),
procrustes_solver_(std::move(procrustes_solver)) {}
// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
// the `pose_transform_mat`.
//
// Here's the algorithm summary:
//
// (1) Project X- and Y- screen landmark coordinates at the Z near plane.
//
// (2) Estimate a canonical-to-runtime landmark set scale by running the
// Procrustes solver using the screen runtime landmarks.
//
// On this iteration, screen landmarks are used instead of unprojected
// metric landmarks as it is not safe to unproject due to the relative
// nature of the input screen landmark Z coordinate.
//
// (3) Use the canonical-to-runtime scale from (2) to unproject the screen
// landmarks. The result is referenced as "intermediate landmarks" because
// they are the first estimation of the resuling metric landmarks, but are
// not quite there yet.
//
// (4) Estimate a canonical-to-runtime landmark set scale by running the
// Procrustes solver using the intermediate runtime landmarks.
//
// (5) Use the product of the scale factors from (2) and (4) to unproject
// the screen landmarks the second time. This is the second and the final
// estimation of the metric landmarks.
//
// (6) Multiply each of the metric landmarks by the inverse pose
// transformation matrix to align the runtime metric face landmarks with
// the canonical metric face landmarks.
//
// Note: the input screen landmarks are in the left-handed coordinate system,
// however any metric landmarks - including the canonical metric
// landmarks, the final runtime metric landmarks and any intermediate
// runtime metric landmarks - are in the right-handed coordinate system.
//
// To keep the logic correct, the landmark set handedness is changed any
// time the screen-to-metric semantic barrier is passed.
absl::Status Convert(
const mediapipe::NormalizedLandmarkList& screen_landmark_list, //
const PerspectiveCameraFrustum& pcf, //
mediapipe::LandmarkList& metric_landmark_list, //
Eigen::Matrix4f& pose_transform_mat) const {
RET_CHECK_EQ(screen_landmark_list.landmark_size(),
canonical_metric_landmarks_.cols())
<< "The number of landmarks doesn't match the number passed upon "
"initialization!";
Eigen::Matrix3Xf screen_landmarks;
ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
ProjectXY(pcf, screen_landmarks);
const float depth_offset = screen_landmarks.row(2).mean();
// 1st iteration: don't unproject XY because it's unsafe to do so due to
// the relative nature of the Z coordinate. Instead, run the
// first estimation on the projected XY and use that scale to
// unproject for the 2nd iteration.
Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
ChangeHandedness(intermediate_landmarks);
ASSIGN_OR_RETURN(const float first_iteration_scale,
EstimateScale(intermediate_landmarks),
_ << "Failed to estimate first iteration scale!");
// 2nd iteration: unproject XY using the scale from the 1st iteration.
intermediate_landmarks = screen_landmarks;
MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
intermediate_landmarks);
UnprojectXY(pcf, intermediate_landmarks);
ChangeHandedness(intermediate_landmarks);
// For face detection input landmarks, re-write Z-coord from the canonical
// landmarks.
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
Eigen::Matrix4f intermediate_pose_transform_mat;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, intermediate_landmarks,
landmark_weights_, intermediate_pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
intermediate_landmarks.row(2) =
(intermediate_pose_transform_mat *
canonical_metric_landmarks_.colwise().homogeneous())
.row(2);
}
ASSIGN_OR_RETURN(const float second_iteration_scale,
EstimateScale(intermediate_landmarks),
_ << "Failed to estimate second iteration scale!");
// Use the total scale to unproject the screen landmarks.
const float total_scale = first_iteration_scale * second_iteration_scale;
MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
UnprojectXY(pcf, screen_landmarks);
ChangeHandedness(screen_landmarks);
// At this point, screen landmarks are converted into metric landmarks.
Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
// For face detection input landmarks, re-write Z-coord from the canonical
// landmarks and run the pose transform estimation again.
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
metric_landmarks.row(2) =
(pose_transform_mat *
canonical_metric_landmarks_.colwise().homogeneous())
.row(2);
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
}
// Multiply each of the metric landmarks by the inverse pose
// transformation matrix to align the runtime metric face landmarks with
// the canonical metric face landmarks.
metric_landmarks = (pose_transform_mat.inverse() *
metric_landmarks.colwise().homogeneous())
.topRows(3);
ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
return absl::OkStatus();
}
private:
void ProjectXY(const PerspectiveCameraFrustum& pcf,
Eigen::Matrix3Xf& landmarks) const {
float x_scale = pcf.right - pcf.left;
float y_scale = pcf.top - pcf.bottom;
float x_translation = pcf.left;
float y_translation = pcf.bottom;
if (origin_point_location_ == proto::OriginPointLocation::TOP_LEFT_CORNER) {
landmarks.row(1) = 1.f - landmarks.row(1).array();
}
landmarks =
landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
}
absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
Eigen::Matrix4f transform_mat;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, landmarks, landmark_weights_,
transform_mat))
<< "Failed to estimate canonical-to-runtime landmark set transform!";
return transform_mat.col(0).norm();
}
static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
float depth_offset, float scale,
Eigen::Matrix3Xf& landmarks) {
landmarks.row(2) =
(landmarks.array().row(2) - depth_offset + pcf.near) / scale;
}
static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
Eigen::Matrix3Xf& landmarks) {
landmarks.row(0) =
landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
landmarks.row(1) =
landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
}
static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
landmarks.row(2) *= -1.f;
}
static void ConvertLandmarkListToEigenMatrix(
const mediapipe::NormalizedLandmarkList& landmark_list,
Eigen::Matrix3Xf& eigen_matrix) {
eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
for (int i = 0; i < landmark_list.landmark_size(); ++i) {
const auto& landmark = landmark_list.landmark(i);
eigen_matrix(0, i) = landmark.x();
eigen_matrix(1, i) = landmark.y();
eigen_matrix(2, i) = landmark.z();
}
}
static void ConvertEigenMatrixToLandmarkList(
const Eigen::Matrix3Xf& eigen_matrix,
mediapipe::LandmarkList& landmark_list) {
landmark_list.Clear();
for (int i = 0; i < eigen_matrix.cols(); ++i) {
auto& landmark = *landmark_list.add_landmark();
landmark.set_x(eigen_matrix(0, i));
landmark.set_y(eigen_matrix(1, i));
landmark.set_z(eigen_matrix(2, i));
}
}
const proto::OriginPointLocation origin_point_location_;
const proto::InputSource input_source_;
Eigen::Matrix3Xf canonical_metric_landmarks_;
Eigen::VectorXf landmark_weights_;
std::unique_ptr<ProcrustesSolver> procrustes_solver_;
};
class GeometryPipelineImpl : public GeometryPipeline {
public:
GeometryPipelineImpl(
const proto::PerspectiveCamera& perspective_camera, //
const proto::Mesh3d& canonical_mesh, //
uint32_t canonical_mesh_vertex_size, //
uint32_t canonical_mesh_num_vertices,
uint32_t canonical_mesh_vertex_position_offset,
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
: perspective_camera_(perspective_camera),
canonical_mesh_(canonical_mesh),
canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
canonical_mesh_vertex_position_offset_(
canonical_mesh_vertex_position_offset),
space_converter_(std::move(space_converter)) {}
absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
const std::vector<mediapipe::NormalizedLandmarkList>&
multi_face_landmarks,
int frame_width, int frame_height) const override {
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
<< "Invalid frame dimensions!";
// Create a perspective camera frustum to be shared for geometry estimation
// per each face.
PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
frame_height);
std::vector<proto::FaceGeometry> multi_face_geometry;
// From this point, the meaning of "face landmarks" is clarified further as
// "screen face landmarks". This is done do distinguish from "metric face
// landmarks" that are derived during the face geometry estimation process.
for (const mediapipe::NormalizedLandmarkList& screen_face_landmarks :
multi_face_landmarks) {
// Having a too compact screen landmark list will result in numerical
// instabilities, therefore such faces are filtered.
if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
continue;
}
// Convert the screen landmarks into the metric landmarks and get the pose
// transformation matrix.
mediapipe::LandmarkList metric_face_landmarks;
Eigen::Matrix4f pose_transform_mat;
MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
metric_face_landmarks,
pose_transform_mat))
<< "Failed to convert landmarks from the screen to the metric space!";
// Pack geometry data for this face.
proto::FaceGeometry face_geometry;
proto::Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
// Copy the canonical face mesh as the face geometry mesh.
mutable_mesh->CopyFrom(canonical_mesh_);
// Replace XYZ vertex mesh coodinates with the metric landmark positions.
for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
canonical_mesh_vertex_position_offset_;
mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
metric_face_landmarks.landmark(i).x());
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
metric_face_landmarks.landmark(i).y());
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
metric_face_landmarks.landmark(i).z());
}
// Populate the face pose transformation matrix.
mediapipe::MatrixDataProtoFromMatrix(
pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
multi_face_geometry.push_back(face_geometry);
}
return multi_face_geometry;
}
private:
static bool IsScreenLandmarkListTooCompact(
const mediapipe::NormalizedLandmarkList& screen_landmarks) {
float mean_x = 0.f;
float mean_y = 0.f;
for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
const auto& landmark = screen_landmarks.landmark(i);
mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
}
float max_sq_dist = 0.f;
for (const auto& landmark : screen_landmarks.landmark()) {
const float d_x = landmark.x() - mean_x;
const float d_y = landmark.y() - mean_y;
max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
}
static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
}
const proto::PerspectiveCamera perspective_camera_;
const proto::Mesh3d canonical_mesh_;
const uint32_t canonical_mesh_vertex_size_;
const uint32_t canonical_mesh_num_vertices_;
const uint32_t canonical_mesh_vertex_position_offset_;
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
};
} // namespace
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
const proto::Environment& environment,
const proto::GeometryPipelineMetadata& metadata) {
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
<< "Invalid environment!";
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
<< "Invalid geometry pipeline metadata!";
const auto& canonical_mesh = metadata.canonical_mesh();
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
VertexComponent::POSITION))
<< "Canonical face mesh must have the `POSITION` vertex component!";
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
VertexComponent::TEX_COORD))
<< "Canonical face mesh must have the `TEX_COORD` vertex component!";
uint32_t canonical_mesh_vertex_size =
GetVertexSize(canonical_mesh.vertex_type());
uint32_t canonical_mesh_num_vertices =
canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
uint32_t canonical_mesh_vertex_position_offset =
GetVertexComponentOffset(canonical_mesh.vertex_type(),
VertexComponent::POSITION)
.value();
// Put the Procrustes landmark basis into Eigen matrices for an easier access.
Eigen::Matrix3Xf canonical_metric_landmarks =
Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
Eigen::VectorXf landmark_weights =
Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
uint32_t vertex_buffer_offset =
canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
canonical_metric_landmarks(0, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset);
canonical_metric_landmarks(1, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
canonical_metric_landmarks(2, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
}
for (const proto::WeightedLandmarkRef& wlr :
metadata.procrustes_landmark_basis()) {
uint32_t landmark_id = wlr.landmark_id();
landmark_weights(landmark_id) = wlr.weight();
}
std::unique_ptr<GeometryPipeline> result =
absl::make_unique<GeometryPipelineImpl>(
environment.perspective_camera(), canonical_mesh,
canonical_mesh_vertex_size, canonical_mesh_num_vertices,
canonical_mesh_vertex_position_offset,
absl::make_unique<ScreenToMetricSpaceConverter>(
environment.origin_point_location(),
metadata.input_source() == proto::InputSource::DEFAULT
? proto::InputSource::FACE_LANDMARK_PIPELINE
: metadata.input_source(),
std::move(canonical_metric_landmarks),
std::move(landmark_weights),
CreateFloatPrecisionProcrustesSolver()));
return result;
}
} // namespace mediapipe::tasks::vision::face_geometry

View File

@ -0,0 +1,69 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
#include <memory>
#include <vector>
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
// Encapsulates a stateless estimator of facial geometry in a Metric space based
// on the normalized face landmarks in the Screen space.
class GeometryPipeline {
public:
virtual ~GeometryPipeline() = default;
// Estimates geometry data for multiple faces.
//
// Returns an error status if any of the passed arguments is invalid.
//
// The result includes face geometry data for a subset of the input faces,
// however geometry data for some faces might be missing. This may happen if
// it'd be unstable to estimate the facial geometry based on a corresponding
// face landmark list for any reason (for example, if the landmark list is too
// compact).
//
// Each face landmark list must have the same number of landmarks as was
// passed upon initialization via the canonical face mesh (as a part of the
// geometry pipeline metadata).
//
// Both `frame_width` and `frame_height` must be positive.
virtual absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
const std::vector<mediapipe::NormalizedLandmarkList>&
multi_face_landmarks,
int frame_width, int frame_height) const = 0;
};
// Creates an instance of `GeometryPipeline`.
//
// Both `environment` and `metadata` must be valid (for details, please refer to
// the proto message definition comments and/or `validation_utils.h/cc`).
//
// Canonical face mesh (defined as a part of `metadata`) must have the
// `POSITION` and the `TEX_COORD` vertex components.
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
const proto::Environment& environment,
const proto::GeometryPipelineMetadata& metadata);
} // namespace mediapipe::tasks::vision::face_geometry
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_

View File

@ -0,0 +1,103 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
namespace {
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
case VertexComponent::TEX_COORD:
return true;
default:
return false;
}
}
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
return 3;
case VertexComponent::TEX_COORD:
return 2;
}
}
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
return 0;
case VertexComponent::TEX_COORD:
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
}
}
} // namespace
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type) {
switch (vertex_type) {
case proto::Mesh3d::VERTEX_PT:
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
}
}
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type) {
switch (primitive_type) {
case proto::Mesh3d::TRIANGLE:
return 3;
}
}
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
VertexComponent vertex_component) {
switch (vertex_type) {
case proto::Mesh3d::VERTEX_PT:
return HasVertexComponentVertexPT(vertex_component);
}
}
absl::StatusOr<uint32_t> GetVertexComponentOffset(
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
<< "A given vertex type doesn't have the requested component!";
switch (vertex_type) {
case proto::Mesh3d::VERTEX_PT:
return GetVertexComponentOffsetVertexPT(vertex_component);
}
}
absl::StatusOr<uint32_t> GetVertexComponentSize(
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
<< "A given vertex type doesn't have the requested component!";
switch (vertex_type) {
case proto::Mesh3d::VERTEX_PT:
return GetVertexComponentSizeVertexPT(vertex_component);
}
}
} // namespace mediapipe::tasks::vision::face_geometry

View File

@ -0,0 +1,51 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
enum class VertexComponent { POSITION, TEX_COORD };
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type);
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type);
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
VertexComponent vertex_component);
// Computes the vertex component offset.
//
// Returns an error status if a given vertex type doesn't have the requested
// component.
absl::StatusOr<uint32_t> GetVertexComponentOffset(
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
// Computes the vertex component size.
//
// Returns an error status if a given vertex type doesn't have the requested
// component.
absl::StatusOr<uint32_t> GetVertexComponentSize(
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
} // namespace mediapipe::tasks::vision::face_geometry
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_

View File

@ -0,0 +1,264 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
#include <cmath>
#include <memory>
#include "Eigen/Dense"
#include "absl/memory/memory.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe::tasks::vision::face_geometry {
namespace {
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
public:
FloatPrecisionProcrustesSolver() = default;
absl::Status SolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& source_points, //
const Eigen::Matrix3Xf& target_points, //
const Eigen::VectorXf& point_weights,
Eigen::Matrix4f& transform_mat) const override {
// Validate inputs.
MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
<< "Failed to validate weighted orthogonal problem input points!";
MP_RETURN_IF_ERROR(
ValidatePointWeights(source_points.cols(), point_weights))
<< "Failed to validate weighted orthogonal problem point weights!";
// Extract square root from the point weights.
Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
// Try to solve the WEOP problem.
MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
source_points, target_points, sqrt_weights, transform_mat))
<< "Failed to solve the WEOP problem!";
return absl::OkStatus();
}
private:
static constexpr float kAbsoluteErrorEps = 1e-9f;
static absl::Status ValidateInputPoints(
const Eigen::Matrix3Xf& source_points,
const Eigen::Matrix3Xf& target_points) {
RET_CHECK_GT(source_points.cols(), 0)
<< "The number of source points must be positive!";
RET_CHECK_EQ(source_points.cols(), target_points.cols())
<< "The number of source and target points must be equal!";
return absl::OkStatus();
}
static absl::Status ValidatePointWeights(
int num_points, const Eigen::VectorXf& point_weights) {
RET_CHECK_GT(point_weights.size(), 0)
<< "The number of point weights must be positive!";
RET_CHECK_EQ(point_weights.size(), num_points)
<< "The number of points and point weights must be equal!";
float total_weight = 0.f;
for (int i = 0; i < num_points; ++i) {
RET_CHECK_GE(point_weights(i), 0.f)
<< "Each point weight must be non-negative!";
total_weight += point_weights(i);
}
RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
<< "The total point weight is too small!";
return absl::OkStatus();
}
static Eigen::VectorXf ExtractSquareRoot(
const Eigen::VectorXf& point_weights) {
Eigen::VectorXf sqrt_weights(point_weights);
for (int i = 0; i < sqrt_weights.size(); ++i) {
sqrt_weights(i) = std::sqrt(sqrt_weights(i));
}
return sqrt_weights;
}
// Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
// a single 4x4 transformation matrix.
static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
const Eigen::Vector3f& t) {
Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
result.leftCols(3).topRows(3) = r_and_s;
result.col(3).topRows(3) = t;
return result;
}
// The weighted problem is thoroughly addressed in Section 2.4 of:
// D. Akca, Generalized Procrustes analysis and its applications
// in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
//
// Notable differences in the code presented here are:
//
// * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
// Our W_p is diagonal (equal to diag(sqrt_weights^2)),
// so we can just set Q = diag(sqrt_weights) instead.
//
// * In the paper, the problem is presented as
// (for W_k = I and W_p = tranposed(Q) Q):
// || Q (c A T + j tranposed(t) - B) || -> min.
//
// We reformulate it as an equivalent minimization of the transpose's
// norm:
// || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
// where tranposed(A) and tranposed(B) are the source and the target point
// clouds, respectively, c tranposed(T) is the rotation+scaling R sought
// for, and Q is diag(sqrt_weights).
//
// Most of the derivations are therefore transposed.
//
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
static absl::Status InternalSolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
// tranposed(A_w).
Eigen::Matrix3Xf weighted_sources =
sources.array().rowwise() * sqrt_weights.array().transpose();
// tranposed(B_w).
Eigen::Matrix3Xf weighted_targets =
targets.array().rowwise() * sqrt_weights.array().transpose();
// w = tranposed(j_w) j_w.
float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
// Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
// Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
//
// tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
// (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
//
// where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
Eigen::Matrix3Xf twice_weighted_sources =
weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
Eigen::Vector3f source_center_of_mass =
twice_weighted_sources.rowwise().sum() / total_weight;
// tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
// tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
Eigen::Matrix3Xf centered_weighted_sources =
weighted_sources - source_center_of_mass * sqrt_weights.transpose();
Eigen::Matrix3f rotation;
MP_RETURN_IF_ERROR(ComputeOptimalRotation(
weighted_targets * centered_weighted_sources.transpose(), rotation))
<< "Failed to compute the optimal rotation!";
ASSIGN_OR_RETURN(
float scale,
ComputeOptimalScale(centered_weighted_sources, weighted_sources,
weighted_targets, rotation),
_ << "Failed to compute the optimal scale!");
// R = c tranposed(T).
Eigen::Matrix3f rotation_and_scale = scale * rotation;
// Compute optimal translation for the weighted problem.
// tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
const auto pointwise_diffs =
weighted_targets - rotation_and_scale * weighted_sources;
// Multiplication by j_w is a respectively weighted column sum.
// (54) from the paper.
const auto weighted_pointwise_diffs =
pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
Eigen::Vector3f translation =
weighted_pointwise_diffs.rowwise().sum() / total_weight;
transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
return absl::OkStatus();
}
// `design_matrix` is a transposed LHS of (51) in the paper.
//
// Note: the output `rotation` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
static absl::Status ComputeOptimalRotation(
const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
<< "Design matrix norm is too small!";
Eigen::JacobiSVD<Eigen::Matrix3f> svd(
design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
Eigen::Matrix3f postrotation = svd.matrixU();
Eigen::Matrix3f prerotation = svd.matrixV().transpose();
// Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
// see "4.6 Constrained orthogonal Procrustes problems"
// in the Gower & Dijksterhuis's book "Procrustes Analysis".
// We flip the sign of the least singular value along with a column in W.
//
// Note that now the sum of singular values doesn't work for scale
// estimation due to this sign flip.
if (postrotation.determinant() * prerotation.determinant() <
static_cast<float>(0)) {
postrotation.col(2) *= static_cast<float>(-1);
}
// Transposed (52) from the paper.
rotation = postrotation * prerotation;
return absl::OkStatus();
}
static absl::StatusOr<float> ComputeOptimalScale(
const Eigen::Matrix3Xf& centered_weighted_sources,
const Eigen::Matrix3Xf& weighted_sources,
const Eigen::Matrix3Xf& weighted_targets,
const Eigen::Matrix3f& rotation) {
// tranposed(T) tranposed(A_w) (I - C).
const auto rotated_centered_weighted_sources =
rotation * centered_weighted_sources;
// Use the identity trace(A B) = sum(A * B^T)
// to avoid building large intermediate matrices (* is Hadamard product).
// (53) from the paper.
float numerator =
rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
float denominator =
centered_weighted_sources.cwiseProduct(weighted_sources).sum();
RET_CHECK_GT(denominator, kAbsoluteErrorEps)
<< "Scale expression denominator is too small!";
RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
<< "Scale is too small!";
return numerator / denominator;
}
};
} // namespace
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
return absl::make_unique<FloatPrecisionProcrustesSolver>();
}
} // namespace mediapipe::tasks::vision::face_geometry

View File

@ -0,0 +1,70 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
#include <memory>
#include "Eigen/Dense"
#include "mediapipe/framework/port/status.h"
namespace mediapipe::tasks::vision::face_geometry {
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
// https://doi.org/10.3929/ethz-a-004656648.
//
// Given the source and the target point clouds, the algorithm estimates
// a 4x4 transformation matrix featuring the following semantic components:
//
// * Uniform scale
// * Rotation
// * Translation
//
// The matrix maps the source point cloud into the target point cloud minimizing
// the Mean Squared Error.
class ProcrustesSolver {
public:
virtual ~ProcrustesSolver() = default;
// Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
//
// All `source_points`, `target_points` and `point_weights` must define the
// same number of points. Elements of `point_weights` must be non-negative.
//
// A too small diameter of either of the point clouds will likely lead to
// numerical instabilities and failure to estimate the transformation.
//
// A too small point cloud total weight will likely lead to numerical
// instabilities and failure to estimate the transformation too.
//
// Small point coordinate deviation for either of the point cloud will likely
// result in a failure as it will make the solution very unstable if possible.
//
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
virtual absl::Status SolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& source_points, //
const Eigen::Matrix3Xf& target_points, //
const Eigen::VectorXf& point_weights, //
Eigen::Matrix4f& transform_mat) const = 0;
};
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
} // namespace mediapipe::tasks::vision::face_geometry
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_

View File

@ -0,0 +1,127 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
absl::Status ValidatePerspectiveCamera(
const proto::PerspectiveCamera& perspective_camera) {
static constexpr float kAbsoluteErrorEps = 1e-9f;
RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
<< "Near Z must be greater than 0 with a margin of 10^{-9}!";
RET_CHECK_GT(perspective_camera.far(),
perspective_camera.near() + kAbsoluteErrorEps)
<< "Far Z must be greater than Near Z with a margin of 10^{-9}!";
RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
<< "Vertical FOV must be positive with a margin of 10^{-9}!";
RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
180.f)
<< "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
return absl::OkStatus();
}
absl::Status ValidateEnvironment(const proto::Environment& environment) {
MP_RETURN_IF_ERROR(
ValidatePerspectiveCamera(environment.perspective_camera()))
<< "Invalid perspective camera!";
return absl::OkStatus();
}
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d) {
const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
<< "Vertex buffer size must a multiple of the vertex size!";
RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
<< "Index buffer size must a multiple of the primitive size!";
const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
for (uint32_t idx : mesh_3d.index_buffer()) {
RET_CHECK_LT(idx, num_vertices)
<< "All mesh indices must refer to an existing vertex!";
}
return absl::OkStatus();
}
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry) {
MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
static constexpr char kInvalid4x4MatrixMessage[] =
"Pose transformation matrix must be a 4x4 matrix!";
const mediapipe::MatrixData& pose_transform_matrix =
face_geometry.pose_transform_matrix();
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
<< kInvalid4x4MatrixMessage;
return absl::OkStatus();
}
absl::Status ValidateGeometryPipelineMetadata(
const proto::GeometryPipelineMetadata& metadata) {
MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
<< "Invalid canonical mesh!";
RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
<< "Procrustes landmark basis must be non-empty!";
const int num_vertices =
metadata.canonical_mesh().vertex_buffer_size() /
GetVertexSize(metadata.canonical_mesh().vertex_type());
for (const proto::WeightedLandmarkRef& wlr :
metadata.procrustes_landmark_basis()) {
RET_CHECK_LT(wlr.landmark_id(), num_vertices)
<< "All Procrustes basis indices must refer to an existing canonical "
"mesh vertex!";
RET_CHECK_GE(wlr.weight(), 0.f)
<< "All Procrustes basis landmarks must have a non-negative weight!";
}
return absl::OkStatus();
}
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
return absl::OkStatus();
}
} // namespace mediapipe::tasks::vision::face_geometry

View File

@ -0,0 +1,70 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
#include "mediapipe/framework/port/status.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
namespace mediapipe::tasks::vision::face_geometry {
// Validates `perspective_camera`.
//
// Near Z must be greater than 0 with a margin of `1e-9`.
// Far Z must be greater than Near Z with a margin of `1e-9`.
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
// edges.
absl::Status ValidatePerspectiveCamera(
const proto::PerspectiveCamera& perspective_camera);
// Validates `environment`.
//
// Environment's perspective camera must be valid.
absl::Status ValidateEnvironment(const proto::Environment& environment);
// Validates `mesh_3d`.
//
// Mesh vertex buffer size must a multiple of the vertex size.
// Mesh index buffer size must a multiple of the primitive size.
// All mesh indices must reference an existing mesh vertex.
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d);
// Validates `face_geometry`.
//
// Face mesh must be valid.
// Face pose transformation matrix must be a 4x4 matrix.
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry);
// Validates `metadata`.
//
// Canonical face mesh must be valid.
// Procrustes landmark basis must be non-empty.
// All Procrustes basis indices must reference an existing canonical mesh
// vertex.
// All Procrustes basis landmarks must have a non-negative weight.
absl::Status ValidateGeometryPipelineMetadata(
const proto::GeometryPipelineMetadata& metadata);
// Validates frame dimensions.
//
// Both frame width and frame height must be positive.
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
} // namespace mediapipe::tasks::vision::face_geometry
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_

View File

@ -0,0 +1,46 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_proto_library(
name = "environment_proto",
srcs = ["environment.proto"],
)
mediapipe_proto_library(
name = "face_geometry_proto",
srcs = ["face_geometry.proto"],
deps = [
":mesh_3d_proto",
"//mediapipe/framework/formats:matrix_data_proto",
],
)
mediapipe_proto_library(
name = "geometry_pipeline_metadata_proto",
srcs = ["geometry_pipeline_metadata.proto"],
deps = [
":mesh_3d_proto",
],
)
mediapipe_proto_library(
name = "mesh_3d_proto",
srcs = ["mesh_3d.proto"],
)

View File

@ -0,0 +1,84 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks.vision.face_geometry.proto;
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
option java_outer_classname = "EnvironmentProto";
// Defines the (0, 0) origin point location of the environment.
//
// The variation in the origin point location can be traced back to the memory
// layout of the camera video frame buffers.
//
// Usually, the memory layout for most CPU (and also some GPU) camera video
// frame buffers results in having the (0, 0) origin point located in the
// Top Left corner.
//
// On the contrary, the memory layout for most GPU camera video frame buffers
// results in having the (0, 0) origin point located in the Bottom Left corner.
//
// Let's consider the following example:
//
// (A) ---------------+
// ___ |
// | (1) | | |
// | / \ | | |
// | |---|===|-| |
// | |---| | | |
// | / \ | | |
// | | | | | |
// | | (2) |=| | |
// | | | | | |
// | |_______| |_| |
// | |@| |@| | | |
// | ___________|_|_ |
// |
// (B) ---------------+
//
// On this example, (1) and (2) have the same X coordinate regardless of the
// origin point location. However, having the origin point located at (A)
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
// results in (1) having a greater Y coordinate if compared to (2).
//
// Providing the correct origin point location for your environment and making
// sure all the input landmarks are in-sync with this location is crucial
// for receiving the correct output face geometry and visual renders.
enum OriginPointLocation {
BOTTOM_LEFT_CORNER = 1;
TOP_LEFT_CORNER = 2;
}
// The perspective camera is defined through its vertical FOV angle and the
// Z-clipping planes. The aspect ratio is a runtime variable for the face
// geometry module and should be provided alongside the face landmarks in order
// to estimate the face geometry on a given frame.
//
// More info on Perspective Cameras:
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
message PerspectiveCamera {
// `0 < vertical_fov_degrees < 180`.
optional float vertical_fov_degrees = 1;
// `0 < near < far`.
optional float near = 2;
optional float far = 3;
}
message Environment {
optional OriginPointLocation origin_point_location = 1;
optional PerspectiveCamera perspective_camera = 2;
}

View File

@ -0,0 +1,60 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks.vision.face_geometry.proto;
import "mediapipe/framework/formats/matrix_data.proto";
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
option java_outer_classname = "FaceGeometryProto";
// Defines the face geometry pipeline estimation result format.
message FaceGeometry {
// Defines a mesh surface for a face. The face mesh vertex IDs are the same as
// the face landmark IDs.
//
// XYZ coordinates exist in the right-handed Metric 3D space configured by an
// environment. UV coodinates are taken from the canonical face mesh model.
//
// XY coordinates are guaranteed to match the screen positions of
// the input face landmarks after (1) being multiplied by the face pose
// transformation matrix and then (2) being projected with a perspective
// camera matrix of the same environment.
//
// NOTE: the triangular topology of the face mesh is only useful when derived
// from the 468 face landmarks, not from the 6 face detection landmarks
// (keypoints). The former don't cover the entire face and this mesh is
// defined here only to comply with the API. It should be considered as
// a placeholder and/or for debugging purposes.
//
// Use the face geometry derived from the face detection landmarks
// (keypoints) for the face pose transformation matrix, not the mesh.
optional Mesh3d mesh = 1;
// Defines a face pose transformation matrix, which provides mapping from
// the static canonical face model to the runtime face. Tries to distinguish
// a head pose change from a facial expression change and to only reflect the
// former.
//
// Is a 4x4 matrix and contains only the following components:
// * Uniform scale
// * Rotation
// * Translation
//
// The last row is guaranteed to be `[0 0 0 1]`.
optional mediapipe.MatrixData pose_transform_matrix = 2;
}

View File

@ -0,0 +1,63 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks.vision.face_geometry.proto;
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
option java_outer_classname = "GeometryPipelineMetadataProto";
enum InputSource {
DEFAULT = 0; // FACE_LANDMARK_PIPELINE
FACE_LANDMARK_PIPELINE = 1;
FACE_DETECTION_PIPELINE = 2;
}
message WeightedLandmarkRef {
// Defines the landmark ID. References an existing face landmark ID.
optional uint32 landmark_id = 1;
// Defines the landmark weight. The larger the weight the more influence this
// landmark has in the basis.
//
// Is positive.
optional float weight = 2;
}
// Next field ID: 4
message GeometryPipelineMetadata {
// Defines the source of the input landmarks to let the underlying geometry
// pipeline to adjust in order to produce the best results.
//
// Face landmark pipeline is expected to produce 3D landmarks with relative Z
// coordinate, which is scaled as the X coordinate assuming the weak
// perspective projection camera model.
//
// Face landmark pipeline is expected to produce 2D landmarks with Z
// coordinate being equal to 0.
optional InputSource input_source = 3;
// Defines a mesh surface for a canonical face. The canonical face mesh vertex
// IDs are the same as the face landmark IDs.
//
// XYZ coordinates are defined in centimeter units.
optional Mesh3d canonical_mesh = 1;
// Defines a weighted landmark basis for running the Procrustes solver
// algorithm inside the geometry pipeline.
//
// A good basis sets face landmark weights in way to distinguish a head pose
// change from a facial expression change and to only respond to the former.
repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
}

View File

@ -0,0 +1,41 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks.vision.face_geometry.proto;
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
option java_outer_classname = "Mesh3dProto";
message Mesh3d {
enum VertexType {
// Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
VERTEX_PT = 0;
}
enum PrimitiveType {
// Is defined by 3 indices: triangle vertex IDs.
TRIANGLE = 0;
}
optional VertexType vertex_type = 1;
optional PrimitiveType primitive_type = 2;
// Vertex buffer size is a multiple of the vertex size (e.g., 5 for
// VERTEX_PT).
repeated float vertex_buffer = 3;
// Index buffer size is a multiple of the primitive size (e.g., 3 for
// TRIANGLE).
repeated uint32 index_buffer = 4;
}

View File

@ -0,0 +1,108 @@
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//mediapipe/tasks:internal"])
mediapipe_proto_library(
name = "tensors_to_image_calculator_proto",
srcs = ["tensors_to_image_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/gpu:gpu_origin_proto",
],
)
cc_library(
name = "tensors_to_image_calculator",
srcs = ["tensors_to_image_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
features = ["-layering_check"], # allow depending on tensor_to_image_calculator_gpu_deps
linkopts = select({
"//mediapipe:apple": [
"-framework CoreVideo",
"-framework MetalKit",
],
"//conditions:default": [],
}),
deps = [
":tensors_to_image_calculator_cc_proto",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/api2:builder",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:packet",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
"//mediapipe/gpu:gpu_origin_cc_proto",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": ["tensor_to_image_calculator_gpu_deps"],
}),
alwayslink = 1,
)
cc_library(
name = "tensor_to_image_calculator_gpu_deps",
visibility = ["//visibility:private"],
deps = select({
"//mediapipe:android": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_quad_renderer",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gpu_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
],
"//mediapipe:ios": [
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
],
"//mediapipe:macos": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_quad_renderer",
"//mediapipe/gpu:gpu_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
],
}),
)

View File

@ -0,0 +1,439 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/packet.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#if MEDIAPIPE_METAL_ENABLED
#import <CoreVideo/CoreVideo.h>
#import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h"
#import "mediapipe/gpu/MPPMetalHelper.h"
#else
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_quad_renderer.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#include "tensorflow/lite/delegates/gpu/common/util.h"
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_shader.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
namespace tasks {
namespace {
using ::mediapipe::api2::Input;
using ::mediapipe::api2::Node;
using ::mediapipe::api2::Output;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
using ::tflite::gpu::gl::GlProgram;
using ::tflite::gpu::gl::GlShader;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
// Commonly used to compute the number of blocks to launch in a kernel.
static int NumGroups(const int size, const int group_size) { // NOLINT
return (size + group_size - 1) / group_size;
}
} // namespace
// Converts a MediaPipe tensor to a MediaPipe Image.
//
// Input streams:
// TENSORS - std::vector<mediapipe::Tensor> that only contains one element.
//
// Output streams:
// OUTPUT - mediapipe::Image.
//
// TODO: Enable TensorsToImageCalculator to run on CPU.
class TensorsToImageCalculator : public Node {
public:
static constexpr Input<std::vector<Tensor>> kInputTensors{"TENSORS"};
static constexpr Output<Image> kOutputImage{"IMAGE"};
MEDIAPIPE_NODE_CONTRACT(kInputTensors, kOutputImage);
static absl::Status UpdateContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc);
absl::Status Process(CalculatorContext* cc);
absl::Status Close(CalculatorContext* cc);
private:
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
bool metal_initialized_ = false;
MPPMetalHelper* gpu_helper_ = nullptr;
id<MTLComputePipelineState> to_buffer_program_;
absl::Status MetalSetup(CalculatorContext* cc);
absl::Status MetalProcess(CalculatorContext* cc);
#else
absl::Status GlSetup(CalculatorContext* cc);
GlCalculatorHelper gl_helper_;
bool gl_initialized_ = false;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
std::unique_ptr<tflite::gpu::gl::GlProgram> gl_compute_program_;
const tflite::gpu::uint3 workgroup_size_ = {8, 8, 1};
#else
GLuint program_ = 0;
std::unique_ptr<mediapipe::QuadRenderer> gl_renderer_;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
};
MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToImageCalculator);
absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#else
return GlCalculatorHelper::UpdateContract(cc);
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(gpu_helper_);
#else
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
return MetalProcess(cc);
#else
return gl_helper_.RunInGlContext([this, cc]() -> absl::Status {
if (!gl_initialized_) {
MP_RETURN_IF_ERROR(GlSetup(cc));
gl_initialized_ = true;
}
if (kInputTensors(cc).IsEmpty()) {
return absl::OkStatus();
}
const auto& input_tensors = kInputTensors(cc).Get();
RET_CHECK_EQ(input_tensors.size(), 1)
<< "Expect 1 input tensor, but have " << input_tensors.size();
const int tensor_width = input_tensors[0].shape().dims[2];
const int tensor_height = input_tensors[0].shape().dims[1];
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
auto out_texture = std::make_unique<tflite::gpu::gl::GlTexture>();
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
tflite::gpu::DataType::UINT8, // GL_RGBA8
{tensor_width, tensor_height}, out_texture.get()));
const int output_index = 0;
glBindImageTexture(output_index, out_texture->id(), 0, GL_FALSE, 0,
GL_WRITE_ONLY, GL_RGBA8);
auto read_view = input_tensors[0].GetOpenGlBufferReadView();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name());
const tflite::gpu::uint3 workload = {tensor_width, tensor_height, 1};
const tflite::gpu::uint3 workgroups =
tflite::gpu::DivideRoundUp(workload, workgroup_size_);
glUseProgram(gl_compute_program_->id());
glUniform2i(glGetUniformLocation(gl_compute_program_->id(), "out_size"),
tensor_width, tensor_height);
MP_RETURN_IF_ERROR(gl_compute_program_->Dispatch(workgroups));
auto texture_buffer = mediapipe::GlTextureBuffer::Wrap(
out_texture->target(), out_texture->id(), tensor_width, tensor_height,
mediapipe::GpuBufferFormat::kBGRA32,
[ptr = out_texture.release()](
std::shared_ptr<mediapipe::GlSyncPoint> sync_token) mutable {
delete ptr;
});
auto output =
std::make_unique<mediapipe::GpuBuffer>(std::move(texture_buffer));
kOutputImage(cc).Send(Image(*output));
;
#else
if (!input_tensors[0].ready_as_opengl_texture_2d()) {
(void)input_tensors[0].GetCpuReadView();
}
auto output_texture =
gl_helper_.CreateDestinationTexture(tensor_width, tensor_height);
gl_helper_.BindFramebuffer(output_texture); // GL_TEXTURE0
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D,
input_tensors[0].GetOpenGlTexture2dReadView().name());
MP_RETURN_IF_ERROR(gl_renderer_->GlRender(
tensor_width, tensor_height, output_texture.width(),
output_texture.height(), mediapipe::FrameScaleMode::kStretch,
mediapipe::FrameRotation::kNone,
/*flip_horizontal=*/false, /*flip_vertical=*/false,
/*flip_texture=*/false));
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
auto output = output_texture.GetFrame<GpuBuffer>();
kOutputImage(cc).Send(Image(*output));
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
return mediapipe::OkStatus();
});
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
gl_helper_.RunInGlContext([this] {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
gl_compute_program_.reset();
#else
if (program_) glDeleteProgram(program_);
program_ = 0;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
});
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
return absl::OkStatus();
}
#if MEDIAPIPE_METAL_ENABLED
absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) {
if (!metal_initialized_) {
MP_RETURN_IF_ERROR(MetalSetup(cc));
metal_initialized_ = true;
}
if (kInputTensors(cc).IsEmpty()) {
return absl::OkStatus();
}
const auto& input_tensors = kInputTensors(cc).Get();
RET_CHECK_EQ(input_tensors.size(), 1)
<< "Expect 1 input tensor, but have " << input_tensors.size();
const int tensor_width = input_tensors[0].shape().dims[2];
const int tensor_height = input_tensors[0].shape().dims[1];
// TODO: Fix unused variable
[[maybe_unused]] id<MTLDevice> device = gpu_helper_.mtlDevice;
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TensorsToImageCalculatorConvert";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
[compute_encoder setComputePipelineState:to_buffer_program_];
auto input_view =
mediapipe::MtlBufferView::GetReadView(input_tensors[0], command_buffer);
[compute_encoder setBuffer:input_view.buffer() offset:0 atIndex:0];
mediapipe::GpuBuffer output =
[gpu_helper_ mediapipeGpuBufferWithWidth:tensor_width
height:tensor_height];
id<MTLTexture> dst_texture = [gpu_helper_ metalTextureWithGpuBuffer:output];
[compute_encoder setTexture:dst_texture atIndex:1];
MTLSize threads_per_group = MTLSizeMake(8, 8, 1);
MTLSize threadgroups =
MTLSizeMake(NumGroups(tensor_width, 8), NumGroups(tensor_height, 8), 1);
[compute_encoder dispatchThreadgroups:threadgroups
threadsPerThreadgroup:threads_per_group];
[compute_encoder endEncoding];
[command_buffer commit];
kOutputImage(cc).Send(Image(output));
return absl::OkStatus();
}
absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) {
id<MTLDevice> device = gpu_helper_.mtlDevice;
const std::string shader_source =
R"(
#include <metal_stdlib>
using namespace metal;
kernel void convertKernel(
device float* in_buf [[ buffer(0) ]],
texture2d<float, access::read_write> out_tex [[ texture(1) ]],
uint2 gid [[ thread_position_in_grid ]]) {
if (gid.x >= out_tex.get_width() || gid.y >= out_tex.get_height()) return;
uint linear_index = 3 * (gid.y * out_tex.get_width() + gid.x);
float4 out_value = float4(in_buf[linear_index], in_buf[linear_index + 1], in_buf[linear_index + 2], 1.0);
out_tex.write(out_value, gid);
}
)";
NSString* library_source =
[NSString stringWithUTF8String:shader_source.c_str()];
NSError* error = nil;
id<MTLLibrary> library =
[device newLibraryWithSource:library_source options:nullptr error:&error];
RET_CHECK(library != nil) << "Couldn't create shader library "
<< [[error localizedDescription] UTF8String];
id<MTLFunction> kernel_func = nil;
kernel_func = [library newFunctionWithName:@"convertKernel"];
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
to_buffer_program_ =
[device newComputePipelineStateWithFunction:kernel_func error:&error];
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
[[error localizedDescription] UTF8String];
return mediapipe::OkStatus();
}
#endif // MEDIAPIPE_METAL_ENABLED
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) {
std::string maybe_flip_y_define;
#if !defined(__APPLE__)
const auto& options = cc->Options<TensorsToImageCalculatorOptions>();
if (options.gpu_origin() != mediapipe::GpuOrigin::TOP_LEFT) {
maybe_flip_y_define = R"(
#define FLIP_Y_COORD
)";
}
#endif // !defined(__APPLE__)
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
const std::string shader_header =
absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size_), R"(
precision highp float;
layout(rgba8, binding = 0) writeonly uniform highp image2D output_texture;
uniform ivec2 out_size;
)");
const std::string shader_body = R"(
layout(std430, binding = 2) readonly buffer B0 {
float elements[];
} input_data; // data tensor
void main() {
int out_width = out_size.x;
int out_height = out_size.y;
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
if (gid.x >= out_width || gid.y >= out_height) { return; }
int linear_index = 3 * (gid.y * out_width + gid.x);
#ifdef FLIP_Y_COORD
int y_coord = out_height - gid.y - 1;
#else
int y_coord = gid.y;
#endif // defined(FLIP_Y_COORD)
ivec2 out_coordinate = ivec2(gid.x, y_coord);
vec4 out_value = vec4(input_data.elements[linear_index], input_data.elements[linear_index + 1], input_data.elements[linear_index + 2], 1.0);
imageStore(output_texture, out_coordinate, out_value);
})";
const std::string shader_full =
absl::StrCat(shader_header, maybe_flip_y_define, shader_body);
GlShader shader;
MP_RETURN_IF_ERROR(
GlShader::CompileShader(GL_COMPUTE_SHADER, shader_full, &shader));
gl_compute_program_ = std::make_unique<GlProgram>();
MP_RETURN_IF_ERROR(
GlProgram::CreateWithShader(shader, gl_compute_program_.get()));
#else
constexpr GLchar kFragColorOutputDeclaration[] = R"(
#ifdef GL_ES
#define fragColor gl_FragColor
#else
out vec4 fragColor;
#endif // defined(GL_ES);
)";
constexpr GLchar kBody[] = R"(
DEFAULT_PRECISION(mediump, float)
in vec2 sample_coordinate;
uniform sampler2D tensor;
void main() {
#ifdef FLIP_Y_COORD
float y_coord = 1.0 - sample_coordinate.y;
#else
float y_coord = sample_coordinate.y;
#endif // defined(FLIP_Y_COORD)
vec3 color = texture2D(tensor, vec2(sample_coordinate.x, y_coord)).rgb;
fragColor = vec4(color, 1.0);
}
)";
const std::string src =
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
kFragColorOutputDeclaration, maybe_flip_y_define, kBody);
gl_renderer_ = std::make_unique<mediapipe::QuadRenderer>();
MP_RETURN_IF_ERROR(gl_renderer_->GlSetup(src.c_str(), {"tensor"}));
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
return mediapipe::OkStatus();
}
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
} // namespace tasks
} // namespace mediapipe

View File

@ -0,0 +1,31 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.tasks;
import "mediapipe/framework/calculator.proto";
import "mediapipe/gpu/gpu_origin.proto";
message TensorsToImageCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional TensorsToImageCalculatorOptions ext = 511831156;
}
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
// to be flipped vertically as tensors are expected to start at top.
// (DEFAULT or unset interpreted as CONVENTIONAL.)
optional mediapipe.GpuOrigin.Mode gpu_origin = 1;
}

View File

@ -203,106 +203,111 @@ INSTANTIATE_TEST_CASE_P(
CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest, CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest,
testing::ValuesIn<CombinedPredictionCalculatorTestCase>({ testing::ValuesIn<CombinedPredictionCalculatorTestCase>({
{ {
.test_name = "TestCustomDramaWinnnerWith_HighCanned_Thresh", /* test_name= */ "TestCustomDramaWinnnerWith_HighCanned_Thresh",
.custom_negative_score = 0.1, /* custom_negative_score= */ 0.1,
.drama_score = 0.5, /* drama_score= */ 0.5,
.llama_score = 0.3, /* llama_score= */ 0.3,
.drama_thresh = 0.25, /* drama_thresh= */ 0.25,
.llama_thresh = 0.7, /* llama_thresh= */ 0.7,
.canned_negative_score = 0.1, /* canned_negative_score= */ 0.1,
.bazinga_score = 0.3, /* bazinga_score= */ 0.3,
.joy_score = 0.3, /* joy_score= */ 0.3,
.peace_score = 0.3, /* peace_score= */ 0.3,
.bazinga_thresh = 0.7, /* bazinga_thresh= */ 0.7,
.joy_thresh = 0.7, /* joy_thresh= */ 0.7,
.peace_thresh = 0.7, /* peace_thresh= */ 0.7,
.max_scoring_label = "CustomDrama", /* max_scoring_label= */ "CustomDrama",
.max_score = 0.5, /* max_score= */ 0.5,
}, },
{ {
.test_name = "TestCannedWinnerWith_HighCustom_ZeroCanned_Thresh", /* test_name= */ "TestCannedWinnerWith_HighCustom_ZeroCanned_"
.custom_negative_score = 0.1, "Thresh",
.drama_score = 0.3, /* custom_negative_score= */ 0.1,
.llama_score = 0.6, /* drama_score= */ 0.3,
.drama_thresh = 0.4, /* llama_score= */ 0.6,
.llama_thresh = 0.8, /* drama_thresh= */ 0.4,
.canned_negative_score = 0.1, /* llama_thresh= */ 0.8,
.bazinga_score = 0.4, /* canned_negative_score= */ 0.1,
.joy_score = 0.3, /* bazinga_score= */ 0.4,
.peace_score = 0.2, /* joy_score= */ 0.3,
.bazinga_thresh = 0.0, /* peace_score= */ 0.2,
.joy_thresh = 0.0, /* bazinga_thresh= */ 0.0,
.peace_thresh = 0.0, /* joy_thresh= */ 0.0,
.max_scoring_label = "CannedBazinga", /* peace_thresh= */ 0.0,
.max_score = 0.4, /* max_scoring_label= */ "CannedBazinga",
/* max_score= */ 0.4,
}, },
{ {
.test_name = "TestNegativeWinnerWith_LowCustom_HighCanned_Thresh", /* test_name= */ "TestNegativeWinnerWith_LowCustom_HighCanned_"
.custom_negative_score = 0.5, "Thresh",
.drama_score = 0.1, /* custom_negative_score= */ 0.5,
.llama_score = 0.4, /* drama_score= */ 0.1,
.drama_thresh = 0.1, /* llama_score= */ 0.4,
.llama_thresh = 0.05, /* drama_thresh= */ 0.1,
.canned_negative_score = 0.1, /* llama_thresh= */ 0.05,
.bazinga_score = 0.3, /* canned_negative_score= */ 0.1,
.joy_score = 0.3, /* bazinga_score= */ 0.3,
.peace_score = 0.3, /* joy_score= */ 0.3,
.bazinga_thresh = 0.7, /* peace_score= */ 0.3,
.joy_thresh = 0.7, /* bazinga_thresh= */ 0.7,
.peace_thresh = 0.7, /* joy_thresh= */ 0.7,
.max_scoring_label = "Negative", /* peace_thresh= */ 0.7,
.max_score = 0.5, /* max_scoring_label= */ "Negative",
/* max_score= */ 0.5,
}, },
{ {
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh", /* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
.custom_negative_score = 0.8, "Thresh",
.drama_score = 0.1, /* custom_negative_score= */ 0.8,
.llama_score = 0.1, /* drama_score= */ 0.1,
.drama_thresh = 0.25, /* llama_score= */ 0.1,
.llama_thresh = 0.7, /* drama_thresh= */ 0.25,
.canned_negative_score = 0.1, /* llama_thresh= */ 0.7,
.bazinga_score = 0.3, /* canned_negative_score= */ 0.1,
.joy_score = 0.3, /* bazinga_score= */ 0.3,
.peace_score = 0.3, /* joy_score= */ 0.3,
.bazinga_thresh = 0.7, /* peace_score= */ 0.3,
.joy_thresh = 0.7, /* bazinga_thresh= */ 0.7,
.peace_thresh = 0.7, /* joy_thresh= */ 0.7,
.max_scoring_label = "Negative", /* peace_thresh= */ 0.7,
.max_score = 0.8, /* max_scoring_label= */ "Negative",
/* max_score= */ 0.8,
}, },
{ {
.test_name = "TestNegativeWinnerWith_HighCustom_HighCannedThresh2", /* test_name= */ "TestNegativeWinnerWith_HighCustom_"
.custom_negative_score = 0.1, "HighCannedThresh2",
.drama_score = 0.2, /* custom_negative_score= */ 0.1,
.llama_score = 0.7, /* drama_score= */ 0.2,
.drama_thresh = 1.1, /* llama_score= */ 0.7,
.llama_thresh = 1.1, /* drama_thresh= */ 1.1,
.canned_negative_score = 0.1, /* llama_thresh= */ 1.1,
.bazinga_score = 0.3, /* canned_negative_score= */ 0.1,
.joy_score = 0.3, /* bazinga_score= */ 0.3,
.peace_score = 0.3, /* joy_score= */ 0.3,
.bazinga_thresh = 0.7, /* peace_score= */ 0.3,
.joy_thresh = 0.7, /* bazinga_thresh= */ 0.7,
.peace_thresh = 0.7, /* joy_thresh= */ 0.7,
.max_scoring_label = "Negative", /* peace_thresh= */ 0.7,
.max_score = 0.1, /* max_scoring_label= */ "Negative",
/* max_score= */ 0.1,
}, },
{ {
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh3", /* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
.custom_negative_score = 0.1, "Thresh3",
.drama_score = 0.3, /* custom_negative_score= */ 0.1,
.llama_score = 0.6, /* drama_score= */ 0.3,
.drama_thresh = 0.4, /* llama_score= */ 0.6,
.llama_thresh = 0.8, /* drama_thresh= */ 0.4,
.canned_negative_score = 0.3, /* llama_thresh= */ 0.8,
.bazinga_score = 0.2, /* canned_negative_score= */ 0.3,
.joy_score = 0.3, /* bazinga_score= */ 0.2,
.peace_score = 0.2, /* joy_score= */ 0.3,
.bazinga_thresh = 0.5, /* peace_score= */ 0.2,
.joy_thresh = 0.5, /* bazinga_thresh= */ 0.5,
.peace_thresh = 0.5, /* joy_thresh= */ 0.5,
.max_scoring_label = "Negative", /* peace_thresh= */ 0.5,
.max_score = 0.1, /* max_scoring_label= */ "Negative",
/* max_score= */ 0.1,
}, },
}), }),
[](const testing::TestParamInfo< [](const testing::TestParamInfo<

View File

@ -117,24 +117,24 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest, LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest,
testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>( testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>(
{{.test_name = "TestWithOffset0", {{/* test_name= */ "TestWithOffset0",
.base_offset = 0, /* base_offset= */ 0,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.1f, /* expected_cell_0_2= */ 0.1f,
.expected_cell_1_5 = 0.1875f, /* expected_cell_1_5= */ 0.1875f,
.rotation = 0}, /* rotation= */ 0},
{.test_name = "TestWithOffset21", {/* test_name= */ "TestWithOffset21",
.base_offset = 21, /* base_offset= */ 21,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.1f, /* expected_cell_0_2= */ 0.1f,
.expected_cell_1_5 = 0.1875f, /* expected_cell_1_5= */ 0.1875f,
.rotation = 0}, /* rotation= */ 0},
{.test_name = "TestWithRotation", {/* test_name= */ "TestWithRotation",
.base_offset = 0, /* base_offset= */ 0,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.075f, /* expected_cell_0_2= */ 0.075f,
.expected_cell_1_5 = -0.25f, /* expected_cell_1_5= */ -0.25f,
.rotation = M_PI / 2.0}}), /* rotation= */ M_PI / 2.0}}),
[](const testing::TestParamInfo< [](const testing::TestParamInfo<
Landmarks2dToMatrixCalculatorTest::ParamType>& info) { Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
return info.param.test_name; return info.param.test_name;
@ -203,30 +203,30 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest, LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest,
testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>( testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>(
{{.test_name = "TestWithOffset0", {{/* test_name= */ "TestWithOffset0",
.base_offset = 0, /* base_offset= */ 0,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.1f, /* expected_cell_0_2= */ 0.1f,
.expected_cell_1_5 = 0.25, /* expected_cell_1_5= */ 0.25,
.rotation = 0}, /* rotation= */ 0},
{.test_name = "TestWithOffset21", {/* test_name= */ "TestWithOffset21",
.base_offset = 21, /* base_offset= */ 21,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.1f, /* expected_cell_0_2= */ 0.1f,
.expected_cell_1_5 = 0.25, /* expected_cell_1_5= */ 0.25,
.rotation = 0}, /* rotation= */ 0},
{.test_name = "NoObjectNormalization", {/* test_name= */ "NoObjectNormalization",
.base_offset = 0, /* base_offset= */ 0,
.object_normalization_origin_offset = -1, /* object_normalization_origin_offset= */ -1,
.expected_cell_0_2 = 0.021f, /* expected_cell_0_2= */ 0.021f,
.expected_cell_1_5 = 0.052f, /* expected_cell_1_5= */ 0.052f,
.rotation = 0}, /* rotation= */ 0},
{.test_name = "TestWithRotation", {/* test_name= */ "TestWithRotation",
.base_offset = 0, /* base_offset= */ 0,
.object_normalization_origin_offset = 0, /* object_normalization_origin_offset= */ 0,
.expected_cell_0_2 = 0.1f, /* expected_cell_0_2= */ 0.1f,
.expected_cell_1_5 = -0.25f, /* expected_cell_1_5= */ -0.25f,
.rotation = M_PI / 2.0}}), /* rotation= */ M_PI / 2.0}}),
[](const testing::TestParamInfo< [](const testing::TestParamInfo<
LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) { LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
return info.param.test_name; return info.param.test_name;

View File

@ -257,19 +257,28 @@ class HandDetectorGraph : public core::ModelTaskGraph {
preprocessed_tensors >> inference.In("TENSORS"); preprocessed_tensors >> inference.In("TENSORS");
auto model_output_tensors = inference.Out("TENSORS"); auto model_output_tensors = inference.Out("TENSORS");
// TODO: support hand detection metadata.
bool has_metadata = false;
// Generates a single side packet containing a vector of SSD anchors. // Generates a single side packet containing a vector of SSD anchors.
auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator"); auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator");
ConfigureSsdAnchorsCalculator( auto& ssd_anchor_options =
&ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>()); ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>();
if (!has_metadata) {
ConfigureSsdAnchorsCalculator(&ssd_anchor_options);
}
auto anchors = ssd_anchor.SideOut(""); auto anchors = ssd_anchor.SideOut("");
// Converts output tensors to Detections. // Converts output tensors to Detections.
auto& tensors_to_detections = auto& tensors_to_detections =
graph.AddNode("TensorsToDetectionsCalculator"); graph.AddNode("TensorsToDetectionsCalculator");
ConfigureTensorsToDetectionsCalculator( if (!has_metadata) {
subgraph_options, ConfigureTensorsToDetectionsCalculator(
&tensors_to_detections subgraph_options,
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>()); &tensors_to_detections
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
}
model_output_tensors >> tensors_to_detections.In("TENSORS"); model_output_tensors >> tensors_to_detections.In("TENSORS");
anchors >> tensors_to_detections.SideIn("ANCHORS"); anchors >> tensors_to_detections.SideIn("ANCHORS");
auto detections = tensors_to_detections.Out("DETECTIONS"); auto detections = tensors_to_detections.Out("DETECTIONS");

View File

@ -148,6 +148,7 @@ cc_library(
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator", "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
"//mediapipe/util:graph_builder_utils",
], ],
alwayslink = 1, alwayslink = 1,
) )

View File

@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <memory> #include <memory>
#include <optional>
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -41,6 +42,7 @@ limitations under the License.
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
#include "mediapipe/util/graph_builder_utils.h"
namespace mediapipe { namespace mediapipe {
namespace tasks { namespace tasks {
@ -53,7 +55,7 @@ using ::mediapipe::NormalizedRect;
using ::mediapipe::api2::Input; using ::mediapipe::api2::Input;
using ::mediapipe::api2::Output; using ::mediapipe::api2::Output;
using ::mediapipe::api2::builder::Graph; using ::mediapipe::api2::builder::Graph;
using ::mediapipe::api2::builder::Source; using ::mediapipe::api2::builder::Stream;
using ::mediapipe::tasks::components::utils::DisallowIf; using ::mediapipe::tasks::components::utils::DisallowIf;
using ::mediapipe::tasks::core::ModelAssetBundleResources; using ::mediapipe::tasks::core::ModelAssetBundleResources;
using ::mediapipe::tasks::metadata::SetExternalFile; using ::mediapipe::tasks::metadata::SetExternalFile;
@ -78,40 +80,46 @@ constexpr char kHandLandmarksDetectorTFLiteName[] =
"hand_landmarks_detector.tflite"; "hand_landmarks_detector.tflite";
struct HandLandmarkerOutputs { struct HandLandmarkerOutputs {
Source<std::vector<NormalizedLandmarkList>> landmark_lists; Stream<std::vector<NormalizedLandmarkList>> landmark_lists;
Source<std::vector<LandmarkList>> world_landmark_lists; Stream<std::vector<LandmarkList>> world_landmark_lists;
Source<std::vector<NormalizedRect>> hand_rects_next_frame; Stream<std::vector<NormalizedRect>> hand_rects_next_frame;
Source<std::vector<ClassificationList>> handednesses; Stream<std::vector<ClassificationList>> handednesses;
Source<std::vector<NormalizedRect>> palm_rects; Stream<std::vector<NormalizedRect>> palm_rects;
Source<std::vector<Detection>> palm_detections; Stream<std::vector<Detection>> palm_detections;
Source<Image> image; Stream<Image> image;
}; };
// Sets the base options in the sub tasks. // Sets the base options in the sub tasks.
absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources, absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
HandLandmarkerGraphOptions* options, HandLandmarkerGraphOptions* options,
bool is_copy) { bool is_copy) {
ASSIGN_OR_RETURN(const auto hand_detector_file,
resources.GetModelFile(kHandDetectorTFLiteName));
auto* hand_detector_graph_options = auto* hand_detector_graph_options =
options->mutable_hand_detector_graph_options(); options->mutable_hand_detector_graph_options();
SetExternalFile(hand_detector_file, if (!hand_detector_graph_options->base_options().has_model_asset()) {
hand_detector_graph_options->mutable_base_options() ASSIGN_OR_RETURN(const auto hand_detector_file,
->mutable_model_asset(), resources.GetModelFile(kHandDetectorTFLiteName));
is_copy); SetExternalFile(hand_detector_file,
hand_detector_graph_options->mutable_base_options()
->mutable_model_asset(),
is_copy);
}
hand_detector_graph_options->mutable_base_options() hand_detector_graph_options->mutable_base_options()
->mutable_acceleration() ->mutable_acceleration()
->CopyFrom(options->base_options().acceleration()); ->CopyFrom(options->base_options().acceleration());
hand_detector_graph_options->mutable_base_options()->set_use_stream_mode( hand_detector_graph_options->mutable_base_options()->set_use_stream_mode(
options->base_options().use_stream_mode()); options->base_options().use_stream_mode());
ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
auto* hand_landmarks_detector_graph_options = auto* hand_landmarks_detector_graph_options =
options->mutable_hand_landmarks_detector_graph_options(); options->mutable_hand_landmarks_detector_graph_options();
SetExternalFile(hand_landmarks_detector_file, if (!hand_landmarks_detector_graph_options->base_options()
hand_landmarks_detector_graph_options->mutable_base_options() .has_model_asset()) {
->mutable_model_asset(), ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
is_copy); resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
SetExternalFile(
hand_landmarks_detector_file,
hand_landmarks_detector_graph_options->mutable_base_options()
->mutable_model_asset(),
is_copy);
}
hand_landmarks_detector_graph_options->mutable_base_options() hand_landmarks_detector_graph_options->mutable_base_options()
->mutable_acceleration() ->mutable_acceleration()
->CopyFrom(options->base_options().acceleration()); ->CopyFrom(options->base_options().acceleration());
@ -119,7 +127,6 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
->set_use_stream_mode(options->base_options().use_stream_mode()); ->set_use_stream_mode(options->base_options().use_stream_mode());
return absl::OkStatus(); return absl::OkStatus();
} }
} // namespace } // namespace
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand // A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
@ -219,12 +226,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
!sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService) !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
.IsAvailable())); .IsAvailable()));
} }
Stream<Image> image_in = graph.In(kImageTag).Cast<Image>();
std::optional<Stream<NormalizedRect>> norm_rect_in;
if (HasInput(sc->OriginalNode(), kNormRectTag)) {
norm_rect_in = graph.In(kNormRectTag).Cast<NormalizedRect>();
}
ASSIGN_OR_RETURN( ASSIGN_OR_RETURN(
auto hand_landmarker_outputs, auto hand_landmarker_outputs,
BuildHandLandmarkerGraph( BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
sc->Options<HandLandmarkerGraphOptions>(), image_in, norm_rect_in, graph));
graph[Input<Image>(kImageTag)],
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
hand_landmarker_outputs.landmark_lists >> hand_landmarker_outputs.landmark_lists >>
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)]; graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
hand_landmarker_outputs.world_landmark_lists >> hand_landmarker_outputs.world_landmark_lists >>
@ -262,8 +272,8 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
// image_in: (mediapipe::Image) stream to run hand landmark detection on. // image_in: (mediapipe::Image) stream to run hand landmark detection on.
// graph: the mediapipe graph instance to be updated. // graph: the mediapipe graph instance to be updated.
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph( absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in, const HandLandmarkerGraphOptions& tasks_options, Stream<Image> image_in,
Source<NormalizedRect> norm_rect_in, Graph& graph) { std::optional<Stream<NormalizedRect>> norm_rect_in, Graph& graph) {
const int max_num_hands = const int max_num_hands =
tasks_options.hand_detector_graph_options().num_hands(); tasks_options.hand_detector_graph_options().num_hands();
@ -293,10 +303,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
// track the hands from the last frame. // track the hands from the last frame.
auto image_for_hand_detector = auto image_for_hand_detector =
DisallowIf(image_in, has_enough_hands, graph); DisallowIf(image_in, has_enough_hands, graph);
auto norm_rect_in_for_hand_detector = std::optional<Stream<NormalizedRect>> norm_rect_in_for_hand_detector;
DisallowIf(norm_rect_in, has_enough_hands, graph); if (norm_rect_in) {
norm_rect_in_for_hand_detector =
DisallowIf(norm_rect_in.value(), has_enough_hands, graph);
}
image_for_hand_detector >> hand_detector.In("IMAGE"); image_for_hand_detector >> hand_detector.In("IMAGE");
norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT"); if (norm_rect_in_for_hand_detector) {
norm_rect_in_for_hand_detector.value() >> hand_detector.In("NORM_RECT");
}
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
auto& hand_association = graph.AddNode("HandAssociationCalculator"); auto& hand_association = graph.AddNode("HandAssociationCalculator");
hand_association.GetOptions<HandAssociationCalculatorOptions>() hand_association.GetOptions<HandAssociationCalculatorOptions>()
@ -313,7 +328,9 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
// series, and we don't want to enable the tracking and hand associations // series, and we don't want to enable the tracking and hand associations
// between input images. Always use the hand detector graph. // between input images. Always use the hand detector graph.
image_in >> hand_detector.In("IMAGE"); image_in >> hand_detector.In("IMAGE");
norm_rect_in >> hand_detector.In("NORM_RECT"); if (norm_rect_in) {
norm_rect_in.value() >> hand_detector.In("NORM_RECT");
}
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
hand_rects_from_hand_detector >> clip_hand_rects.In(""); hand_rects_from_hand_detector >> clip_hand_rects.In("");
} }

View File

@ -34,16 +34,14 @@ objc_library(
data = [ data = [
"//mediapipe/tasks/testdata/vision:test_images", "//mediapipe/tasks/testdata/vision:test_images",
], ],
sdk_frameworks = [
"CoreMedia",
"CoreVideo",
"CoreGraphics",
"UIKit",
"Accelerate",
],
deps = [ deps = [
"//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/vision/core:MPPImage", "//mediapipe/tasks/ios/vision/core:MPPImage",
"//third_party/apple_frameworks:Accelerate",
"//third_party/apple_frameworks:CoreGraphics",
"//third_party/apple_frameworks:CoreMedia",
"//third_party/apple_frameworks:CoreVideo",
"//third_party/apple_frameworks:UIKit",
], ],
) )

View File

@ -11,11 +11,6 @@ objc_library(
"-std=c++17", "-std=c++17",
], ],
module_name = "MPPImage", module_name = "MPPImage",
sdk_frameworks = [
"CoreMedia",
"CoreVideo",
"UIKit",
],
deps = [ deps = [
"//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils",

View File

@ -0,0 +1,27 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#include "mediapipe/framework/packet.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
/**
* This class helps create various kinds of packets for Mediapipe Vision Tasks.
*/
@interface MPPVisionPacketCreator : NSObject
+ (mediapipe::Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error;
@end

View File

@ -0,0 +1,43 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
#include "mediapipe/framework/formats/image.h"
namespace {
using ::mediapipe::Image;
using ::mediapipe::ImageFrame;
using ::mediapipe::MakePacket;
using ::mediapipe::Packet;
} // namespace
struct freeDeleter {
void operator()(void *ptr) { free(ptr); }
};
@implementation MPPVisionPacketCreator
+ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error {
std::unique_ptr<ImageFrame> imageFrame = [image imageFrameWithError:error];
if (!imageFrame) {
return Packet();
}
return MakePacket<Image>(std::move(imageFrame));
}
@end

View File

@ -4,23 +4,22 @@ licenses(["notice"])
objc_library( objc_library(
name = "MPPImageUtils", name = "MPPImageUtils",
srcs = ["sources/MPPImage+Utils.m"], srcs = ["sources/MPPImage+Utils.mm"],
hdrs = ["sources/MPPImage+Utils.h"], hdrs = ["sources/MPPImage+Utils.h"],
copts = [ copts = [
"-ObjC++", "-ObjC++",
"-std=c++17", "-std=c++17",
], ],
module_name = "MPPImageUtils", module_name = "MPPImageUtils",
sdk_frameworks = [
"Accelerate",
"CoreGraphics",
"CoreImage",
"CoreVideo",
"UIKit",
],
deps = [ deps = [
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/vision/core:MPPImage", "//mediapipe/tasks/ios/vision/core:MPPImage",
"//third_party/apple_frameworks:UIKit", "//third_party/apple_frameworks:Accelerate",
"//third_party/apple_frameworks:CoreGraphics",
"//third_party/apple_frameworks:CoreImage",
"//third_party/apple_frameworks:CoreVideo",
], ],
) )

View File

@ -14,30 +14,27 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
#include "mediapipe/framework/formats/image_frame.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" #import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_BEGIN
/** /**
* Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library. * Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`.
*/ */
@interface MPPImage (Utils) @interface MPPImage (Utils)
/** Bitmap size of the image. */
@property(nonatomic, readonly) CGSize bitmapSize;
/** /**
* Returns the underlying uint8 pixel buffer of an `MPPImage`. * Converts the `MPPImage` into a `mediapipe::ImageFrame`.
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is
* data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated * converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R,
* in the R, G, B channels. * G, B channels.
* *
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved. * error will be saved.
* *
* @return The underlying pixel buffer of the `MPPImage` or nil in case of errors. * @return An std::unique_ptr<mediapipe::ImageFrame> or `nullptr` in case of errors.
*/ */
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error; - (std::unique_ptr<mediapipe::ImageFrame>)imageFrameWithError:(NSError **)error;
@end @end

View File

@ -22,6 +22,12 @@
#import <CoreImage/CoreImage.h> #import <CoreImage/CoreImage.h>
#import <CoreVideo/CoreVideo.h> #import <CoreVideo/CoreVideo.h>
#include "mediapipe/framework/formats/image_format.pb.h"
namespace {
using ::mediapipe::ImageFrame;
}
@interface MPPPixelDataUtils : NSObject @interface MPPPixelDataUtils : NSObject
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData + (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
@ -35,21 +41,20 @@
@interface MPPCVPixelBufferUtils : NSObject @interface MPPCVPixelBufferUtils : NSObject
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; + (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error;
@end @end
@interface MPPCGImageUtils : NSObject @interface MPPCGImageUtils : NSObject
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error; + (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
@end @end
@interface UIImage (RawPixelDataUtils) @interface UIImage (ImageFrameUtils)
@property(nonatomic, readonly) CGSize bitmapSize; - (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error;
- (uint8_t *)pixelDataWithError:(NSError **)error;
@end @end
@ -120,9 +125,14 @@
@implementation MPPCVPixelBufferUtils @implementation MPPCVPixelBufferUtils
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error { + (std::unique_ptr<ImageFrame>)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error {
CVPixelBufferLockBaseAddress(pixelBuffer, 0); CVPixelBufferLockBaseAddress(pixelBuffer, 0);
size_t width = CVPixelBufferGetWidth(pixelBuffer);
size_t height = CVPixelBufferGetHeight(pixelBuffer);
size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer);
uint8_t *rgbPixelData = [MPPPixelDataUtils uint8_t *rgbPixelData = [MPPPixelDataUtils
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer) rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
withWidth:CVPixelBufferGetWidth(pixelBuffer) withWidth:CVPixelBufferGetWidth(pixelBuffer)
@ -133,19 +143,24 @@
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
return rgbPixelData; if (!rgbPixelData) {
return nullptr;
}
std::unique_ptr<ImageFrame> imageFrame = absl::make_unique<ImageFrame>(
::mediapipe::ImageFormat::SRGB, width, height, stride, static_cast<uint8 *>(rgbPixelData),
/*deleter=*/free);
return imageFrame;
} }
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer + (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error { error:(NSError **)error {
uint8_t *pixelData = NULL;
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer); OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
switch (pixelBufferFormat) { switch (pixelBufferFormat) {
case kCVPixelFormatType_32BGRA: { case kCVPixelFormatType_32BGRA: {
pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error]; return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error];
break;
} }
default: { default: {
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
@ -155,20 +170,20 @@
} }
} }
return pixelData; return nullptr;
} }
@end @end
@implementation MPPCGImageUtils @implementation MPPCGImageUtils
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error { + (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
size_t width = CGImageGetWidth(cgImage); size_t width = CGImageGetWidth(cgImage);
size_t height = CGImageGetHeight(cgImage); size_t height = CGImageGetHeight(cgImage);
NSInteger bitsPerComponent = 8; NSInteger bitsPerComponent = 8;
NSInteger channelCount = 4; NSInteger channelCount = 4;
UInt8 *pixel_data_to_return = NULL; UInt8 *pixelDataToReturn = NULL;
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
size_t bytesPerRow = channelCount * width; size_t bytesPerRow = channelCount * width;
@ -191,12 +206,12 @@
if (srcData) { if (srcData) {
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input // We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage. // a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
withWidth:width withWidth:width
height:height height:height
stride:bytesPerRow stride:bytesPerRow
pixelBufferFormat:kCVPixelFormatType_32RGBA pixelBufferFormat:kCVPixelFormatType_32RGBA
error:error]; error:error];
} }
CGContextRelease(context); CGContextRelease(context);
@ -204,38 +219,38 @@
CGColorSpaceRelease(colorSpace); CGColorSpaceRelease(colorSpace);
return pixel_data_to_return; std::unique_ptr<ImageFrame> imageFrame =
absl::make_unique<ImageFrame>(mediapipe::ImageFormat::SRGB, (int)width, (int)height,
(int)bytesPerRow, static_cast<uint8 *>(pixelDataToReturn),
/*deleter=*/free);
return imageFrame;
} }
@end @end
@implementation UIImage (RawPixelDataUtils) @implementation UIImage (ImageFrameUtils)
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
uint8_t *pixelData = NULL;
- (std::unique_ptr<ImageFrame>)imageFrameFromCIImageWithError:(NSError **)error {
if (self.CIImage.pixelBuffer) { if (self.CIImage.pixelBuffer) {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error];
error:error];
} else if (self.CIImage.CGImage) { } else if (self.CIImage.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error]; return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error];
} else { } else {
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"CIImage should have CGImage or CVPixelBuffer info."]; description:@"CIImage should have CGImage or CVPixelBuffer info."];
} }
return pixelData; return nullptr;
} }
- (uint8_t *)pixelDataWithError:(NSError **)error { - (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
uint8_t *pixelData = nil;
if (self.CGImage) { if (self.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error]; return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error];
} else if (self.CIImage) { } else if (self.CIImage) {
pixelData = [self pixelDataFromCIImageWithError:error]; return [self imageFrameFromCIImageWithError:error];
} else { } else {
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError withCode:MPPTasksErrorCodeInvalidArgumentError
@ -243,46 +258,24 @@
" CIImage or CGImage."]; " CIImage or CGImage."];
} }
return pixelData; return nullptr;
} }
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
if (self.CGImage) {
width = CGImageGetWidth(self.CGImage);
height = CGImageGetHeight(self.CGImage);
} else if (self.CIImage.pixelBuffer) {
width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
} else if (self.CIImage.CGImage) {
width = CGImageGetWidth(self.CIImage.CGImage);
height = CGImageGetHeight(self.CIImage.CGImage);
}
return CGSizeMake(width, height);
}
@end @end
@implementation MPPImage (Utils) @implementation MPPImage (Utils)
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error { - (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
uint8_t *pixelData = NULL;
switch (self.imageSourceType) { switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: { case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer); CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error];
error:error];
break;
} }
case MPPImageSourceTypePixelBuffer: { case MPPImageSourceTypePixelBuffer: {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error]; return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error];
break;
} }
case MPPImageSourceTypeImage: { case MPPImageSourceTypeImage: {
pixelData = [self.image pixelDataWithError:error]; return [self.image imageFrameWithError:error];
break;
} }
default: default:
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
@ -290,35 +283,7 @@
description:@"Invalid source type for MPPImage."]; description:@"Invalid source type for MPPImage."];
} }
return pixelData; return nullptr;
}
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
width = CVPixelBufferGetWidth(pixelBuffer);
height = CVPixelBufferGetHeight(pixelBuffer);
break;
}
case MPPImageSourceTypePixelBuffer: {
width = CVPixelBufferGetWidth(self.pixelBuffer);
height = CVPixelBufferGetHeight(self.pixelBuffer);
break;
}
case MPPImageSourceTypeImage: {
width = self.image.bitmapSize.width;
height = self.image.bitmapSize.height;
break;
}
default:
break;
}
return CGSizeMake(width, height);
} }
@end @end

View File

@ -0,0 +1,38 @@
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
objc_library(
name = "MPPImageClassifierResult",
srcs = ["sources/MPPImageClassifierResult.m"],
hdrs = ["sources/MPPImageClassifierResult.h"],
deps = [
"//mediapipe/tasks/ios/components/containers:MPPClassificationResult",
"//mediapipe/tasks/ios/core:MPPTaskResult",
],
)
objc_library(
name = "MPPImageClassifierOptions",
srcs = ["sources/MPPImageClassifierOptions.m"],
hdrs = ["sources/MPPImageClassifierOptions.h"],
deps = [
":MPPImageClassifierResult",
"//mediapipe/tasks/ios/core:MPPTaskOptions",
"//mediapipe/tasks/ios/vision/core:MPPRunningMode",
],
)

View File

@ -0,0 +1,71 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
NS_ASSUME_NONNULL_BEGIN
/**
* Options for setting up a `MPPImageClassifier`.
*/
NS_SWIFT_NAME(ImageClassifierOptions)
@interface MPPImageClassifierOptions : MPPTaskOptions <NSCopying>
@property(nonatomic) MPPRunningMode runningMode;
/**
* The user-defined result callback for processing live stream data. The result callback should only
* be specified when the running mode is set to the live stream mode.
*/
@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error);
/**
* The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults
* to English.
*/
@property(nonatomic, copy) NSString *displayNamesLocale;
/**
* The maximum number of top-scored classification results to return. If < 0, all available results
* will be returned. If 0, an invalid argument error is returned.
*/
@property(nonatomic) NSInteger maxResults;
/**
* Score threshold to override the one provided in the model metadata (if any). Results below this
* value are rejected.
*/
@property(nonatomic) float scoreThreshold;
/**
* The allowlist of category names. If non-empty, detection results whose category name is not in
* this set will be filtered out. Duplicate or unknown category names are ignored. Mutually
* exclusive with categoryDenylist.
*/
@property(nonatomic, copy) NSArray<NSString *> *categoryAllowlist;
/**
* The denylist of category names. If non-empty, detection results whose category name is in this
* set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive
* with categoryAllowlist.
*/
@property(nonatomic, copy) NSArray<NSString *> *categoryDenylist;
@end
NS_ASSUME_NONNULL_END

Some files were not shown because too many files have changed in this diff Show More