Merge branch 'google:master' into master
This commit is contained in:
commit
6e7018b826
11
README.md
11
README.md
|
@ -19,6 +19,17 @@ ML solutions for live and streaming media.
|
|||
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
||||
|
||||
----
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## ML solutions in MediaPipe
|
||||
|
||||
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
||||
|
|
13
docs/_layouts/forward.html
Normal file
13
docs/_layouts/forward.html
Normal file
|
@ -0,0 +1,13 @@
|
|||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<meta http-equiv="refresh" content="0;url={{ page.target }}"/>
|
||||
<link rel="canonical" href="{{ page.target }}"/>
|
||||
<title>Redirecting</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>This page now lives on https://developers.google.com/mediapipe/. If you aren't automatically
|
||||
redirected, follow this
|
||||
<a href="{{ page.target }}">link</a>.</p>
|
||||
</body>
|
||||
</html>
|
|
@ -593,3 +593,105 @@ CalculatorGraphConfig BuildGraph() {
|
|||
return graph.GetConfig();
|
||||
}
|
||||
```
|
||||
|
||||
### Separate nodes for better readability
|
||||
|
||||
```c++ {.bad}
|
||||
CalculatorGraphConfig BuildGraph() {
|
||||
Graph graph;
|
||||
|
||||
// Inputs.
|
||||
Stream<A> a = graph.In(0).Cast<A>();
|
||||
auto& node1 = graph.AddNode("Calculator1");
|
||||
a.ConnectTo(node1.In("INPUT"));
|
||||
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
|
||||
auto& node2 = graph.AddNode("Calculator2");
|
||||
b.ConnectTo(node2.In("INPUT"));
|
||||
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
|
||||
auto& node3 = graph.AddNode("Calculator3");
|
||||
b.ConnectTo(node3.In("INPUT_B"));
|
||||
c.ConnectTo(node3.In("INPUT_C"));
|
||||
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
|
||||
auto& node4 = graph.AddNode("Calculator4");
|
||||
b.ConnectTo(node4.In("INPUT_B"));
|
||||
c.ConnectTo(node4.In("INPUT_C"));
|
||||
d.ConnectTo(node4.In("INPUT_D"));
|
||||
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
|
||||
// Outputs.
|
||||
b.SetName("b").ConnectTo(graph.Out(0));
|
||||
c.SetName("c").ConnectTo(graph.Out(1));
|
||||
d.SetName("d").ConnectTo(graph.Out(2));
|
||||
e.SetName("e").ConnectTo(graph.Out(3));
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
```
|
||||
|
||||
In the above code, it can be hard to grasp the idea where each node begins and
|
||||
ends. To improve this and help your code readers, you can simply have blank
|
||||
lines before and after each node:
|
||||
|
||||
```c++ {.good}
|
||||
CalculatorGraphConfig BuildGraph() {
|
||||
Graph graph;
|
||||
|
||||
// Inputs.
|
||||
Stream<A> a = graph.In(0).Cast<A>();
|
||||
|
||||
auto& node1 = graph.AddNode("Calculator1");
|
||||
a.ConnectTo(node1.In("INPUT"));
|
||||
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
|
||||
|
||||
auto& node2 = graph.AddNode("Calculator2");
|
||||
b.ConnectTo(node2.In("INPUT"));
|
||||
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
|
||||
|
||||
auto& node3 = graph.AddNode("Calculator3");
|
||||
b.ConnectTo(node3.In("INPUT_B"));
|
||||
c.ConnectTo(node3.In("INPUT_C"));
|
||||
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
|
||||
|
||||
auto& node4 = graph.AddNode("Calculator4");
|
||||
b.ConnectTo(node4.In("INPUT_B"));
|
||||
c.ConnectTo(node4.In("INPUT_C"));
|
||||
d.ConnectTo(node4.In("INPUT_D"));
|
||||
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
|
||||
|
||||
// Outputs.
|
||||
b.SetName("b").ConnectTo(graph.Out(0));
|
||||
c.SetName("c").ConnectTo(graph.Out(1));
|
||||
d.SetName("d").ConnectTo(graph.Out(2));
|
||||
e.SetName("e").ConnectTo(graph.Out(3));
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
```
|
||||
|
||||
Also, the above representation matches `CalculatorGraphConfig` proto
|
||||
representation better.
|
||||
|
||||
If you extract nodes into utility functions, they are scoped within functions
|
||||
already and it's clear where they begin and end, so it's completely fine to
|
||||
have:
|
||||
|
||||
```c++ {.good}
|
||||
CalculatorGraphConfig BuildGraph() {
|
||||
Graph graph;
|
||||
|
||||
// Inputs.
|
||||
Stream<A> a = graph.In(0).Cast<A>();
|
||||
|
||||
Stream<B> b = RunCalculator1(a, graph);
|
||||
Stream<C> c = RunCalculator2(b, graph);
|
||||
Stream<D> d = RunCalculator3(b, c, graph);
|
||||
Stream<E> e = RunCalculator4(b, c, d, graph);
|
||||
|
||||
// Outputs.
|
||||
b.SetName("b").ConnectTo(graph.Out(0));
|
||||
c.SetName("c").ConnectTo(graph.Out(1));
|
||||
d.SetName("d").ConnectTo(graph.Out(2));
|
||||
e.SetName("e").ConnectTo(graph.Out(3));
|
||||
|
||||
return graph.GetConfig();
|
||||
}
|
||||
```
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/calculators
|
||||
title: Calculators
|
||||
parent: Framework Concepts
|
||||
nav_order: 1
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/overview
|
||||
title: Framework Concepts
|
||||
nav_order: 5
|
||||
has_children: true
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/gpu
|
||||
title: GPU
|
||||
parent: Framework Concepts
|
||||
nav_order: 5
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/graphs
|
||||
title: Graphs
|
||||
parent: Framework Concepts
|
||||
nav_order: 2
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/packets
|
||||
title: Packets
|
||||
parent: Framework Concepts
|
||||
nav_order: 3
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/realtime_streams
|
||||
title: Real-time Streams
|
||||
parent: Framework Concepts
|
||||
nav_order: 6
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/synchronization
|
||||
title: Synchronization
|
||||
parent: Framework Concepts
|
||||
nav_order: 4
|
||||
|
|
|
@ -13,6 +13,17 @@ nav_order: 2
|
|||
{:toc}
|
||||
---
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||
the new site, but will remain available in the source code repository on an
|
||||
as-is basis.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
MediaPipe Android Solution APIs (currently in alpha) are available in:
|
||||
|
||||
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
|
||||
|
|
|
@ -12,6 +12,17 @@ nav_exclude: true
|
|||
{:toc}
|
||||
---
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||
the new site, but will remain available in the source code repository on an
|
||||
as-is basis.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
### Android
|
||||
|
||||
Please see these [instructions](./android.md).
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/getting_started/faq
|
||||
title: FAQ
|
||||
parent: Getting Started
|
||||
nav_order: 9
|
||||
|
@ -59,7 +60,7 @@ The second approach allows up to [`max_in_flight`] invocations of the
|
|||
packets from [`CalculatorBase::Process`] are automatically ordered by timestamp
|
||||
before they are passed along to downstream calculators.
|
||||
|
||||
With either aproach, you must be aware that the calculator running in parallel
|
||||
With either approach, you must be aware that the calculator running in parallel
|
||||
cannot maintain internal state in the same way as a normal sequential
|
||||
calculator.
|
||||
|
||||
|
|
|
@ -11,3 +11,14 @@ has_children: true
|
|||
1. TOC
|
||||
{:toc}
|
||||
---
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||
the new site, but will remain available in the source code repository on an
|
||||
as-is basis.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/getting_started/gpu_support
|
||||
title: GPU Support
|
||||
parent: Getting Started
|
||||
nav_order: 7
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/getting_started/help
|
||||
title: Getting Help
|
||||
parent: Getting Started
|
||||
nav_order: 8
|
||||
|
@ -37,8 +38,8 @@ If you open a GitHub issue, here is our policy:
|
|||
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
|
||||
- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
|
||||
- **Bazel version**:
|
||||
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev enviroment)**:
|
||||
- **Xcode & Tulsi version (if issue is related to building in mobile dev enviroment)**:
|
||||
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev environment)**:
|
||||
- **Xcode & Tulsi version (if issue is related to building in mobile dev environment)**:
|
||||
- **Exact steps to reproduce**:
|
||||
|
||||
### Describe the problem
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/getting_started/install
|
||||
title: Installation
|
||||
parent: Getting Started
|
||||
nav_order: 6
|
||||
|
|
|
@ -12,6 +12,17 @@ nav_order: 4
|
|||
{:toc}
|
||||
---
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||
the new site, but will remain available in the source code repository on an
|
||||
as-is basis.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Ready-to-use JavaScript Solutions
|
||||
|
||||
MediaPipe currently offers the following solutions:
|
||||
|
@ -33,7 +44,7 @@ snippets.
|
|||
|
||||
| Browser | Platform | Notes |
|
||||
| ------- | ----------------------- | -------------------------------------- |
|
||||
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
|
||||
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuchsia |
|
||||
| | | unsupported. |
|
||||
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
||||
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/getting_started/troubleshooting
|
||||
title: Troubleshooting
|
||||
parent: Getting Started
|
||||
nav_order: 10
|
||||
|
@ -65,7 +66,7 @@ WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/gith
|
|||
```
|
||||
|
||||
usually indicates that Bazel fails to download necessary dependency repositories
|
||||
that MediaPipe needs. MedaiPipe has several dependency repositories that are
|
||||
that MediaPipe needs. MediaPipe has several dependency repositories that are
|
||||
hosted by Google sites. In some regions, you may need to set up a network proxy
|
||||
or use a VPN to access those resources. You may also need to append
|
||||
`--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"`
|
||||
|
|
|
@ -19,6 +19,17 @@ ML solutions for live and streaming media.
|
|||
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
||||
|
||||
----
|
||||
|
||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||
as the primary developer documentation
|
||||
site for MediaPipe starting April 3, 2023.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## ML solutions in MediaPipe
|
||||
|
||||
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 14
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 10
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe Box Tracking has been powering real-time tracking in
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 1
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe Face Detection is an ultrafast face detection solution that comes with
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 2
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
|
||||
|
@ -133,7 +143,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
|
|||
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
||||
detection in the screen coordinate space: the X- and Y- coordinates are
|
||||
normalized screen coordinates, while the Z coordinate is relative and is scaled
|
||||
as the X coodinate under the
|
||||
as the X coordinate under the
|
||||
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
|
||||
This format is well-suited for some applications, however it does not directly
|
||||
enable the full spectrum of augmented reality (AR) features like aligning a
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 8
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
![hair_segmentation_android_gpu_gif](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu.gif)
|
||||
|
||||
## Example Apps
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 4
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
The ability to perceive the shape and motion of hands can be a vital component
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 6
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
Live perception of simultaneous [human pose](./pose.md),
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 11
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
Augmented Reality (AR) technology creates fun, engaging, and immersive user
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 3
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
A wide range of real-world applications, including computational photography
|
||||
|
@ -38,7 +48,7 @@ camera, in real-time, without the need for specialized hardware. Through use of
|
|||
iris landmarks, the solution is also able to determine the metric distance
|
||||
between the subject and the camera with relative error less than 10%. Note that
|
||||
iris tracking does not infer the location at which people are looking, nor does
|
||||
it provide any form of identity recognition. With the cross-platfrom capability
|
||||
it provide any form of identity recognition. With the cross-platform capability
|
||||
of the MediaPipe framework, MediaPipe Iris can run on most modern
|
||||
[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the
|
||||
[web](#web).
|
||||
|
@ -99,7 +109,7 @@ You can also find more details in this
|
|||
### Iris Landmark Model
|
||||
|
||||
The iris model takes an image patch of the eye region and estimates both the eye
|
||||
landmarks (along the eyelid) and iris landmarks (along ths iris contour). You
|
||||
landmarks (along the eyelid) and iris landmarks (along this iris contour). You
|
||||
can find more details in this [paper](https://arxiv.org/abs/2006.11341).
|
||||
|
||||
![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) |
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 13
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe KNIFT is a template-based feature matching solution using KNIFT
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 15
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe is a useful and general framework for media processing that can
|
||||
|
@ -85,7 +95,7 @@ process new data sets, in the documentation of
|
|||
|
||||
MediaSequence uses SequenceExamples as the format of both inputs and
|
||||
outputs. Annotations are encoded as inputs in a SequenceExample of metadata
|
||||
that defines the labels and the path to the cooresponding video file. This
|
||||
that defines the labels and the path to the corresponding video file. This
|
||||
metadata is passed as input to the C++ `media_sequence_demo` binary, and the
|
||||
output is a SequenceExample filled with images and annotations ready for
|
||||
model training.
|
||||
|
|
|
@ -12,6 +12,20 @@ nav_order: 30
|
|||
{:toc}
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for
|
||||
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
as of March 1, 2023. All other
|
||||
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
to a new MediaPipe Solution. The code repository and prebuilt binaries for all
|
||||
MediaPipe Legacy Solutions will continue to be provided on an as-is basis.
|
||||
We encourage you to check out the new MediaPipe Solutions at:
|
||||
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
|
||||
|
||||
* Short-range model (best for faces within 2 meters from the camera):
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 9
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
![object_detection_android_gpu.gif](https://mediapipe.dev/images/mobile/object_detection_android_gpu.gif)
|
||||
|
||||
## Example Apps
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 12
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
MediaPipe Objectron is a mobile real-time 3D object detection solution for
|
||||
|
@ -170,7 +180,7 @@ and a
|
|||
The detection subgraph performs ML inference only once every few frames to
|
||||
reduce computation load, and decodes the output tensor to a FrameAnnotation that
|
||||
contains nine keypoints: the 3D bounding box's center and its eight vertices.
|
||||
The tracking subgraph runs every frame, using the box traker in
|
||||
The tracking subgraph runs every frame, using the box tracker in
|
||||
[MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly
|
||||
enclosing the projection of the 3D bounding box, and lifts the tracked 2D
|
||||
keypoints to 3D with
|
||||
|
@ -613,7 +623,7 @@ z_ndc = 1 / Z
|
|||
|
||||
### Pixel Space
|
||||
|
||||
In this API we set upper-left coner of an image as the origin of pixel
|
||||
In this API we set upper-left corner of an image as the origin of pixel
|
||||
coordinate. One can convert from NDC to pixel space as follows:
|
||||
|
||||
```
|
||||
|
|
|
@ -20,6 +20,16 @@ nav_order: 5
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
Human pose estimation from video plays a critical role in various applications
|
||||
|
|
|
@ -19,6 +19,16 @@ nav_order: 1
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
One of the applications
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 7
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||
Solution. For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
## Overview
|
||||
|
||||
*Fig 1. Example of MediaPipe Selfie Segmentation.* |
|
||||
|
|
|
@ -13,7 +13,21 @@ has_toc: false
|
|||
{:toc}
|
||||
---
|
||||
|
||||
Note: These solutions are no longer actively maintained. Consider using or migrating to the new [MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide).
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions. We have
|
||||
ended support for
|
||||
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
as of March 1, 2023. All other
|
||||
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
to a new MediaPipe Solution. The
|
||||
[code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
|
||||
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to
|
||||
be provided on an as-is basis. We encourage you to check out the new MediaPipe
|
||||
Solutions at:
|
||||
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
|
||||
|
||||
*This notice and web page will be removed on June 1, 2023.*
|
||||
|
||||
----
|
||||
|
||||
MediaPipe offers open source cross-platform, customizable ML solutions for live
|
||||
and streaming media.
|
||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 16
|
|||
</details>
|
||||
---
|
||||
|
||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||
For more information, see the new
|
||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||
site.*
|
||||
|
||||
*This notice and web page will be removed on April 3, 2023.*
|
||||
|
||||
----
|
||||
|
||||
MediaPipe is a useful and general framework for media processing that can assist
|
||||
with research, development, and deployment of ML models. This example focuses on
|
||||
model development by demonstrating how to prepare training data and do model
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
layout: default
|
||||
layout: forward
|
||||
target: https://developers.google.com/mediapipe/framework/tools/visualizer
|
||||
title: Visualizer
|
||||
parent: Tools
|
||||
nav_order: 1
|
||||
|
|
|
@ -48,7 +48,6 @@ class MergeToVectorCalculator : public Node {
|
|||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) {
|
||||
const int input_num = kIn(cc).Count();
|
||||
std::vector<T> output_vector;
|
||||
for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) {
|
||||
const auto& elem = *it;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type")
|
||||
|
||||
|
@ -23,6 +24,14 @@ package(
|
|||
|
||||
licenses(["notice"])
|
||||
|
||||
selects.config_setting_group(
|
||||
name = "ios_or_disable_gpu",
|
||||
match_any = [
|
||||
"//mediapipe/gpu:disable_gpu",
|
||||
"//mediapipe:ios",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "detection_proto",
|
||||
srcs = ["detection.proto"],
|
||||
|
@ -336,9 +345,7 @@ cc_library(
|
|||
"//conditions:default": [
|
||||
"//mediapipe/gpu:gl_texture_buffer",
|
||||
],
|
||||
"//mediapipe:ios": [
|
||||
],
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"ios_or_disable_gpu": [],
|
||||
}) + select({
|
||||
"//conditions:default": [],
|
||||
"//mediapipe:apple": [
|
||||
|
|
|
@ -18,15 +18,16 @@
|
|||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tool {
|
||||
|
||||
absl::Status StatusInvalid(const std::string& message) {
|
||||
absl::Status StatusInvalid(absl::string_view message) {
|
||||
return absl::Status(absl::StatusCode::kInvalidArgument, message);
|
||||
}
|
||||
|
||||
absl::Status StatusFail(const std::string& message) {
|
||||
absl::Status StatusFail(absl::string_view message) {
|
||||
return absl::Status(absl::StatusCode::kUnknown, message);
|
||||
}
|
||||
|
||||
|
@ -35,12 +36,12 @@ absl::Status StatusStop() {
|
|||
"mediapipe::tool::StatusStop()");
|
||||
}
|
||||
|
||||
absl::Status AddStatusPrefix(const std::string& prefix,
|
||||
absl::Status AddStatusPrefix(absl::string_view prefix,
|
||||
const absl::Status& status) {
|
||||
return absl::Status(status.code(), absl::StrCat(prefix, status.message()));
|
||||
}
|
||||
|
||||
absl::Status CombinedStatus(const std::string& general_comment,
|
||||
absl::Status CombinedStatus(absl::string_view general_comment,
|
||||
const std::vector<absl::Status>& statuses) {
|
||||
// The final error code is absl::StatusCode::kUnknown if not all
|
||||
// the error codes are the same. Otherwise it is the same error code
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "absl/base/macros.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -34,16 +35,16 @@ absl::Status StatusStop();
|
|||
// Return a status which signals an invalid initial condition (for
|
||||
// example an InputSidePacket does not include all necessary fields).
|
||||
ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.")
|
||||
absl::Status StatusInvalid(const std::string& error_message);
|
||||
absl::Status StatusInvalid(absl::string_view error_message);
|
||||
|
||||
// Return a status which signals that something unexpectedly failed.
|
||||
ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.")
|
||||
absl::Status StatusFail(const std::string& error_message);
|
||||
absl::Status StatusFail(absl::string_view error_message);
|
||||
|
||||
// Prefixes the given string to the error message in status.
|
||||
// This function should be considered internal to the framework.
|
||||
// TODO Replace usage of AddStatusPrefix with util::Annotate().
|
||||
absl::Status AddStatusPrefix(const std::string& prefix,
|
||||
absl::Status AddStatusPrefix(absl::string_view prefix,
|
||||
const absl::Status& status);
|
||||
|
||||
// Combine a vector of absl::Status into a single composite status.
|
||||
|
@ -51,7 +52,7 @@ absl::Status AddStatusPrefix(const std::string& prefix,
|
|||
// will be returned.
|
||||
// This function should be considered internal to the framework.
|
||||
// TODO Move this function to somewhere with less visibility.
|
||||
absl::Status CombinedStatus(const std::string& general_comment,
|
||||
absl::Status CombinedStatus(absl::string_view general_comment,
|
||||
const std::vector<absl::Status>& statuses);
|
||||
|
||||
} // namespace tool
|
||||
|
|
|
@ -15,7 +15,9 @@
|
|||
package com.google.mediapipe.components;
|
||||
|
||||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.min;
|
||||
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.SurfaceTexture;
|
||||
import android.opengl.GLES11Ext;
|
||||
import android.opengl.GLES20;
|
||||
|
@ -25,9 +27,12 @@ import android.util.Log;
|
|||
import com.google.mediapipe.framework.TextureFrame;
|
||||
import com.google.mediapipe.glutil.CommonShaders;
|
||||
import com.google.mediapipe.glutil.ShaderUtil;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import javax.microedition.khronos.egl.EGLConfig;
|
||||
import javax.microedition.khronos.opengles.GL10;
|
||||
|
@ -44,6 +49,13 @@ import javax.microedition.khronos.opengles.GL10;
|
|||
* {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}).
|
||||
*/
|
||||
public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||
/**
|
||||
* Listener for Bitmap capture requests.
|
||||
*/
|
||||
public interface BitmapCaptureListener {
|
||||
void onBitmapCaptured(Bitmap result);
|
||||
}
|
||||
|
||||
private static final String TAG = "DemoRenderer";
|
||||
private static final int ATTRIB_POSITION = 1;
|
||||
private static final int ATTRIB_TEXTURE_COORDINATE = 2;
|
||||
|
@ -56,12 +68,32 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
|||
private int frameUniform;
|
||||
private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES;
|
||||
private int textureTransformUniform;
|
||||
private boolean shouldFitToWidth = false;
|
||||
// Controls the alignment between frame size and surface size, 0.5f default is centered.
|
||||
private float alignmentHorizontal = 0.5f;
|
||||
private float alignmentVertical = 0.5f;
|
||||
private float[] textureTransformMatrix = new float[16];
|
||||
private SurfaceTexture surfaceTexture = null;
|
||||
private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>();
|
||||
private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean();
|
||||
private BitmapCaptureListener bitmapCaptureListener;
|
||||
|
||||
/**
|
||||
* Sets the {@link BitmapCaptureListener}.
|
||||
*/
|
||||
public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) {
|
||||
this.bitmapCaptureListener = bitmapCaptureListener;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request to capture Bitmap of the next frame.
|
||||
*
|
||||
* The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note
|
||||
* this is an expensive operation and the result may not be available for a while.
|
||||
*/
|
||||
public void captureNextFrameBitmap() {
|
||||
captureNextFrameBitmap.set(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSurfaceCreated(GL10 gl, EGLConfig config) {
|
||||
|
@ -147,6 +179,31 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
|||
|
||||
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
|
||||
ShaderUtil.checkGlError("glDrawArrays");
|
||||
|
||||
// Capture Bitmap if requested.
|
||||
BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener;
|
||||
if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) {
|
||||
int bitmapSize = surfaceWidth * surfaceHeight;
|
||||
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4);
|
||||
byteBuffer.order(ByteOrder.nativeOrder());
|
||||
GLES20.glReadPixels(
|
||||
0, 0, surfaceWidth, surfaceHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, byteBuffer);
|
||||
int[] pixelBuffer = new int[bitmapSize];
|
||||
byteBuffer.asIntBuffer().get(pixelBuffer);
|
||||
for (int i = 0; i < bitmapSize; i++) {
|
||||
// Swap R and B channels.
|
||||
pixelBuffer[i] =
|
||||
(pixelBuffer[i] & 0xff00ff00)
|
||||
| ((pixelBuffer[i] & 0x000000ff) << 16)
|
||||
| ((pixelBuffer[i] & 0x00ff0000) >> 16);
|
||||
}
|
||||
Bitmap bitmap = Bitmap.createBitmap(surfaceWidth, surfaceHeight, Bitmap.Config.ARGB_8888);
|
||||
bitmap.setPixels(
|
||||
pixelBuffer, /* offset= */bitmapSize - surfaceWidth, /* stride= */-surfaceWidth,
|
||||
/* x= */0, /* y= */0, surfaceWidth, surfaceHeight);
|
||||
bitmapCaptureListener.onBitmapCaptured(bitmap);
|
||||
}
|
||||
|
||||
GLES20.glBindTexture(textureTarget, 0);
|
||||
ShaderUtil.checkGlError("unbind surfaceTexture");
|
||||
|
||||
|
@ -158,13 +215,17 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
|||
// TODO: compute scale from surfaceTexture size.
|
||||
float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f;
|
||||
float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f;
|
||||
// Whichever of the two scales is greater corresponds to the dimension where the image
|
||||
// is proportionally smaller than the view. Dividing both scales by that number results
|
||||
// By default whichever of the two scales is greater corresponds to the dimension where the
|
||||
// image is proportionally smaller than the view. Dividing both scales by that number results
|
||||
// in that dimension having scale 1.0, and thus touching the edges of the view, while the
|
||||
// other is cropped proportionally.
|
||||
float maxScale = max(scaleWidth, scaleHeight);
|
||||
scaleWidth /= maxScale;
|
||||
scaleHeight /= maxScale;
|
||||
// other is cropped proportionally. If shouldFitToWidth is set as true, use the min scale
|
||||
// if frame width is greater than frame height.
|
||||
float scale = max(scaleWidth, scaleHeight);
|
||||
if (shouldFitToWidth && (frameWidth > frameHeight)) {
|
||||
scale = min(scaleWidth, scaleHeight);
|
||||
}
|
||||
scaleWidth /= scale;
|
||||
scaleHeight /= scale;
|
||||
|
||||
// Alignment controls where the visible section is placed within the full camera frame, with
|
||||
// (0, 0) being the bottom left, and (1, 1) being the top right.
|
||||
|
@ -232,6 +293,11 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
|||
frameHeight = height;
|
||||
}
|
||||
|
||||
/** Supports fit to width when the frame width is greater than the frame height. */
|
||||
public void setShouldFitToWidth(boolean shouldFitToWidth) {
|
||||
this.shouldFitToWidth = shouldFitToWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* When the aspect ratios between the camera frame and the surface size are mismatched, this
|
||||
* controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning
|
||||
|
|
|
@ -35,7 +35,6 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto",
|
||||
"//mediapipe/tasks/cc/components/processors:embedder_options",
|
||||
"//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/components/utils:cosine_similarity",
|
||||
"//mediapipe/tasks/cc/core:base_options",
|
||||
"//mediapipe/tasks/cc/core:task_runner",
|
||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||
|
|
|
@ -29,7 +29,6 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/processors/embedder_options.h"
|
||||
#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/components/utils/cosine_similarity.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
|
@ -147,10 +146,4 @@ absl::Status AudioEmbedder::EmbedAsync(Matrix audio_block,
|
|||
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
|
||||
}
|
||||
|
||||
absl::StatusOr<double> AudioEmbedder::CosineSimilarity(
|
||||
const components::containers::Embedding& u,
|
||||
const components::containers::Embedding& v) {
|
||||
return components::utils::CosineSimilarity(u, v);
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::audio::audio_embedder
|
||||
|
|
|
@ -125,16 +125,6 @@ class AudioEmbedder : core::BaseAudioTaskApi {
|
|||
|
||||
// Shuts down the AudioEmbedder when all works are done.
|
||||
absl::Status Close() { return runner_->Close(); }
|
||||
|
||||
// Utility function to compute cosine similarity [1] between two embeddings.
|
||||
// May return an InvalidArgumentError if e.g. the embeddings are of different
|
||||
// types (quantized vs. float), have different sizes, or have a an L2-norm of
|
||||
// 0.
|
||||
//
|
||||
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
||||
static absl::StatusOr<double> CosineSimilarity(
|
||||
const components::containers::Embedding& u,
|
||||
const components::containers::Embedding& v);
|
||||
};
|
||||
|
||||
} // namespace mediapipe::tasks::audio::audio_embedder
|
||||
|
|
|
@ -54,8 +54,6 @@ constexpr char kModelWithMetadata[] = "yamnet_embedding_metadata.tflite";
|
|||
constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav";
|
||||
constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav";
|
||||
constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav";
|
||||
constexpr float kSpeechSimilarities[] = {0.985359, 0.994349, 0.993227, 0.996658,
|
||||
0.996384};
|
||||
constexpr int kMilliSecondsPerSecond = 1000;
|
||||
constexpr int kYamnetNumOfAudioSamples = 15600;
|
||||
constexpr int kYamnetAudioSampleRate = 16000;
|
||||
|
@ -163,15 +161,9 @@ TEST_F(EmbedTest, SucceedsWithSameAudioAtDifferentSampleRates) {
|
|||
audio_embedder->Embed(audio_buffer1, 16000));
|
||||
MP_ASSERT_OK_AND_ASSIGN(auto result2,
|
||||
audio_embedder->Embed(audio_buffer2, 48000));
|
||||
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
|
||||
int expected_size = 5;
|
||||
ASSERT_EQ(result1.size(), expected_size);
|
||||
ASSERT_EQ(result2.size(), expected_size);
|
||||
for (int i = 0; i < expected_size; ++i) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
||||
result1[i].embeddings[0],
|
||||
result2[i].embeddings[0]));
|
||||
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
|
||||
}
|
||||
MP_EXPECT_OK(audio_embedder->Close());
|
||||
}
|
||||
|
||||
|
@ -192,10 +184,6 @@ TEST_F(EmbedTest, SucceedsWithDifferentAudios) {
|
|||
audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate));
|
||||
ASSERT_EQ(result1.size(), 5);
|
||||
ASSERT_EQ(result2.size(), 1);
|
||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
||||
result1[0].embeddings[0],
|
||||
result2[0].embeddings[0]));
|
||||
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
|
||||
MP_EXPECT_OK(audio_embedder->Close());
|
||||
}
|
||||
|
||||
|
@ -258,15 +246,9 @@ TEST_F(EmbedAsyncTest, SucceedsWithSameAudioAtDifferentSampleRates) {
|
|||
RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1);
|
||||
std::vector<AudioEmbedderResult> result2;
|
||||
RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2);
|
||||
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
|
||||
int expected_size = 5;
|
||||
ASSERT_EQ(result1.size(), expected_size);
|
||||
ASSERT_EQ(result2.size(), expected_size);
|
||||
for (int i = 0; i < expected_size; ++i) {
|
||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
||||
result1[i].embeddings[0],
|
||||
result2[i].embeddings[0]));
|
||||
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
|
||||
|
@ -276,10 +258,6 @@ TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
|
|||
RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2);
|
||||
ASSERT_EQ(result1.size(), 5);
|
||||
ASSERT_EQ(result2.size(), 1);
|
||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
||||
result1[0].embeddings[0],
|
||||
result2[0].embeddings[0]));
|
||||
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -185,15 +185,15 @@ TEST_P(CalibrationWithoutIndicesTest, Succeeds) {
|
|||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest,
|
||||
Values(CalibrationTestParams{.score_transformation = "IDENTITY",
|
||||
.expected_results = {0.4948505976,
|
||||
0.5059588508, 0.2, 0.2}},
|
||||
Values(CalibrationTestParams{
|
||||
/* score_transformation= */ "IDENTITY",
|
||||
/* expected_results= */ {0.4948505976, 0.5059588508, 0.2, 0.2}},
|
||||
CalibrationTestParams{
|
||||
.score_transformation = "LOG",
|
||||
.expected_results = {0.2976901255, 0.3393665735, 0.2, 0.2}},
|
||||
/* score_transformation= */ "LOG",
|
||||
/* expected_results= */ {0.2976901255, 0.3393665735, 0.2, 0.2}},
|
||||
CalibrationTestParams{
|
||||
.score_transformation = "INVERSE_LOGISTIC",
|
||||
.expected_results = {0.3203217641, 0.3778080605, 0.2, 0.2}}),
|
||||
/* score_transformation= */ "INVERSE_LOGISTIC",
|
||||
/* expected_results= */ {0.3203217641, 0.3778080605, 0.2, 0.2}}),
|
||||
[](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) {
|
||||
return info.param.score_transformation;
|
||||
});
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_
|
||||
|
||||
#include <cstdlib>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
|
|
@ -332,9 +332,11 @@ cc_library(
|
|||
"//mediapipe/tasks:internal",
|
||||
],
|
||||
deps = [
|
||||
":external_file_handler",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
|
||||
"//mediapipe/tasks/metadata:metadata_schema_cc",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@flatbuffers//:runtime_cc",
|
||||
|
@ -375,6 +377,5 @@ cc_test(
|
|||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
|
||||
"//mediapipe/tasks/cc/metadata/utils:zip_utils",
|
||||
"@org_tensorflow//tensorflow/lite/c:common",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -29,7 +29,7 @@ limitations under the License.
|
|||
#include <windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -102,9 +102,13 @@ absl::StatusOr<std::string> PathToResourceAsFile(std::string path) {
|
|||
#else
|
||||
if (absl::StartsWith(path, "./")) {
|
||||
path = "mediapipe" + path.substr(1);
|
||||
} else if (path[0] != '/') {
|
||||
path = "mediapipe/" + path;
|
||||
}
|
||||
|
||||
std::string error;
|
||||
// TODO: We should ideally use `CreateForTests` when this is
|
||||
// accessed from unit tests.
|
||||
std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles(
|
||||
::bazel::tools::cpp::runfiles::Runfiles::Create("", &error));
|
||||
if (!runfiles) {
|
||||
|
|
|
@ -88,6 +88,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFile) {
|
|||
.status());
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
|
||||
const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY);
|
||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||
|
@ -103,6 +104,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
|
|||
model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite")
|
||||
.status());
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) {
|
||||
auto file_content = LoadBinaryContent(kTestModelBundlePath);
|
||||
|
|
|
@ -136,6 +136,7 @@ TEST_F(ModelResourcesTest, CreateFromFile) {
|
|||
CheckModelResourcesPackets(model_resources.get());
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
|
||||
const int model_file_descriptor = open(kTestModelPath, O_RDONLY);
|
||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||
|
@ -145,6 +146,7 @@ TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
|
|||
ModelResources::Create(kTestModelResourcesTag, std::move(model_file)));
|
||||
CheckModelResourcesPackets(model_resources.get());
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
TEST_F(ModelResourcesTest, CreateFromInvalidFile) {
|
||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||
|
@ -168,6 +170,15 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
|
|||
auto status_or_model_resources =
|
||||
ModelResources::Create(kTestModelResourcesTag, std::move(model_file));
|
||||
|
||||
#ifdef _WIN32
|
||||
EXPECT_EQ(status_or_model_resources.status().code(),
|
||||
absl::StatusCode::kFailedPrecondition);
|
||||
EXPECT_THAT(
|
||||
status_or_model_resources.status().message(),
|
||||
testing::HasSubstr("File descriptors are not supported on Windows."));
|
||||
AssertStatusHasMediaPipeTasksStatusCode(status_or_model_resources.status(),
|
||||
MediaPipeTasksStatus::kFileReadError);
|
||||
#else
|
||||
EXPECT_EQ(status_or_model_resources.status().code(),
|
||||
absl::StatusCode::kInvalidArgument);
|
||||
EXPECT_THAT(
|
||||
|
@ -176,6 +187,7 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
|
|||
AssertStatusHasMediaPipeTasksStatusCode(
|
||||
status_or_model_resources.status(),
|
||||
MediaPipeTasksStatus::kInvalidArgumentError);
|
||||
#endif // _WIN32
|
||||
}
|
||||
|
||||
TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) {
|
||||
|
|
|
@ -23,6 +23,8 @@ limitations under the License.
|
|||
#include "absl/strings/string_view.h"
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "mediapipe/calculators/core/flow_limiter_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/core/external_file_handler.h"
|
||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
|
@ -34,13 +36,11 @@ constexpr char kFlowLimiterCalculatorName[] = "FlowLimiterCalculator";
|
|||
} // namespace
|
||||
|
||||
std::string LoadBinaryContent(const char* filename) {
|
||||
std::ifstream input_file(filename, std::ios::binary | std::ios::ate);
|
||||
// Find buffer size from input file, and load the buffer.
|
||||
size_t buffer_size = input_file.tellg();
|
||||
std::string buffer(buffer_size, '\0');
|
||||
input_file.seekg(0, std::ios::beg);
|
||||
input_file.read(const_cast<char*>(buffer.c_str()), buffer_size);
|
||||
return buffer;
|
||||
proto::ExternalFile external_file;
|
||||
external_file.set_file_name(filename);
|
||||
auto file_handler =
|
||||
ExternalFileHandler::CreateFromExternalFile(&external_file);
|
||||
return std::string{(*file_handler)->GetFileContent()};
|
||||
}
|
||||
|
||||
int FindTensorIndexByMetadataName(
|
||||
|
|
|
@ -16,6 +16,7 @@ cc_test(
|
|||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/tasks/cc:common",
|
||||
"//mediapipe/tasks/cc/core:utils",
|
||||
"//mediapipe/tasks/cc/metadata:metadata_extractor",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
|
|
|
@ -25,12 +25,14 @@ limitations under the License.
|
|||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
#include "mediapipe/tasks/cc/common.h"
|
||||
#include "mediapipe/tasks/cc/core/utils.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace metadata {
|
||||
namespace {
|
||||
|
||||
using core::LoadBinaryContent;
|
||||
using ::testing::Optional;
|
||||
|
||||
constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata";
|
||||
|
@ -53,8 +55,8 @@ constexpr char kRandomTextFile[] = "external_file";
|
|||
|
||||
absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor(
|
||||
std::string model_name, std::string* file_contents) {
|
||||
MP_RETURN_IF_ERROR(file::GetContents(
|
||||
file::JoinPath("./", kTestDataDirectory, model_name), file_contents));
|
||||
*file_contents = LoadBinaryContent(
|
||||
file::JoinPath("./", kTestDataDirectory, model_name).c_str());
|
||||
return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(),
|
||||
file_contents->length());
|
||||
}
|
||||
|
|
|
@ -26,7 +26,11 @@ using ::testing::MatchesRegex;
|
|||
|
||||
TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) {
|
||||
// Validates that the version is well-formed (x.y.z).
|
||||
#ifdef _WIN32
|
||||
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
|
||||
#else
|
||||
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
||||
#endif // _WIN32
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -83,7 +83,11 @@ TEST(MetadataVersionTest,
|
|||
builder.GetSize(), &min_version),
|
||||
kTfLiteOk);
|
||||
// Validates that the version is well-formed (x.y.z).
|
||||
#ifdef _WIN32
|
||||
EXPECT_THAT(min_version, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
|
||||
#else
|
||||
EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
||||
#endif // _WIN32
|
||||
}
|
||||
|
||||
TEST(MetadataVersionTest,
|
||||
|
|
49
mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
Normal file
49
mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
Normal file
|
@ -0,0 +1,49 @@
|
|||
# Copyright 2023 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "geometry_pipeline_calculator_proto",
|
||||
srcs = ["geometry_pipeline_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "geometry_pipeline_calculator",
|
||||
srcs = ["geometry_pipeline_calculator.cc"],
|
||||
deps = [
|
||||
":geometry_pipeline_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/libs:geometry_pipeline",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/libs:validation_utils",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/util:resource_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,194 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
namespace {
|
||||
|
||||
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
|
||||
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
|
||||
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
|
||||
|
||||
using ::mediapipe::tasks::vision::face_geometry::proto::Environment;
|
||||
using ::mediapipe::tasks::vision::face_geometry::proto::FaceGeometry;
|
||||
using ::mediapipe::tasks::vision::face_geometry::proto::
|
||||
GeometryPipelineMetadata;
|
||||
|
||||
// A calculator that renders a visual effect for multiple faces.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE_SIZE (`std::pair<int, int>`, required):
|
||||
// The size of the current frame. The first element of the pair is the frame
|
||||
// width; the other one is the frame height.
|
||||
//
|
||||
// The face landmarks should have been detected on a frame with the same
|
||||
// ratio. If used as-is, the resulting face geometry visualization should be
|
||||
// happening on a frame with the same ratio as well.
|
||||
//
|
||||
// MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
|
||||
// A vector of face landmark lists.
|
||||
//
|
||||
// Input side packets:
|
||||
// ENVIRONMENT (`proto::Environment`, required)
|
||||
// Describes an environment; includes the camera frame origin point location
|
||||
// as well as virtual camera parameters.
|
||||
//
|
||||
// Output:
|
||||
// MULTI_FACE_GEOMETRY (`std::vector<FaceGeometry>`, required):
|
||||
// A vector of face geometry data.
|
||||
//
|
||||
// Options:
|
||||
// metadata_path (`string`, optional):
|
||||
// Defines a path for the geometry pipeline metadata file.
|
||||
//
|
||||
// The geometry pipeline metadata file format must be the binary
|
||||
// `GeometryPipelineMetadata` proto.
|
||||
//
|
||||
class GeometryPipelineCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->InputSidePackets().Tag(kEnvironmentTag).Set<Environment>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
cc->Inputs()
|
||||
.Tag(kMultiFaceLandmarksTag)
|
||||
.Set<std::vector<mediapipe::NormalizedLandmarkList>>();
|
||||
cc->Outputs().Tag(kMultiFaceGeometryTag).Set<std::vector<FaceGeometry>>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(mediapipe::TimestampDiff(0));
|
||||
|
||||
const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
GeometryPipelineMetadata metadata,
|
||||
ReadMetadataFromFile(options.metadata_path()),
|
||||
_ << "Failed to read the geometry pipeline metadata from file!");
|
||||
|
||||
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
|
||||
<< "Invalid geometry pipeline metadata!";
|
||||
|
||||
const Environment& environment =
|
||||
cc->InputSidePackets().Tag(kEnvironmentTag).Get<Environment>();
|
||||
|
||||
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
|
||||
ASSIGN_OR_RETURN(geometry_pipeline_,
|
||||
CreateGeometryPipeline(environment, metadata),
|
||||
_ << "Failed to create a geometry pipeline!");
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
// Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
|
||||
// to have a non-empty packet. In case this requirement is not met, there's
|
||||
// nothing to be processed at the current timestamp.
|
||||
if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
|
||||
cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
const auto& image_size =
|
||||
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
const auto& multi_face_landmarks =
|
||||
cc->Inputs()
|
||||
.Tag(kMultiFaceLandmarksTag)
|
||||
.Get<std::vector<mediapipe::NormalizedLandmarkList>>();
|
||||
|
||||
auto multi_face_geometry = absl::make_unique<std::vector<FaceGeometry>>();
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
*multi_face_geometry,
|
||||
geometry_pipeline_->EstimateFaceGeometry(
|
||||
multi_face_landmarks, //
|
||||
/*frame_width*/ image_size.first,
|
||||
/*frame_height*/ image_size.second),
|
||||
_ << "Failed to estimate face geometry for multiple faces!");
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kMultiFaceGeometryTag)
|
||||
.AddPacket(mediapipe::Adopt<std::vector<FaceGeometry>>(
|
||||
multi_face_geometry.release())
|
||||
.At(cc->InputTimestamp()));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Close(CalculatorContext* cc) override {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
static absl::StatusOr<GeometryPipelineMetadata> ReadMetadataFromFile(
|
||||
const std::string& metadata_path) {
|
||||
ASSIGN_OR_RETURN(std::string metadata_blob,
|
||||
ReadContentBlobFromFile(metadata_path),
|
||||
_ << "Failed to read a metadata blob from file!");
|
||||
|
||||
GeometryPipelineMetadata metadata;
|
||||
RET_CHECK(metadata.ParseFromString(metadata_blob))
|
||||
<< "Failed to parse a metadata proto from a binary blob!";
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
static absl::StatusOr<std::string> ReadContentBlobFromFile(
|
||||
const std::string& unresolved_path) {
|
||||
ASSIGN_OR_RETURN(std::string resolved_path,
|
||||
mediapipe::PathToResourceAsFile(unresolved_path),
|
||||
_ << "Failed to resolve path! Path = " << unresolved_path);
|
||||
|
||||
std::string content_blob;
|
||||
MP_RETURN_IF_ERROR(
|
||||
mediapipe::GetResourceContents(resolved_path, &content_blob))
|
||||
<< "Failed to read content blob! Resolved path = " << resolved_path;
|
||||
|
||||
return content_blob;
|
||||
}
|
||||
|
||||
std::unique_ptr<GeometryPipeline> geometry_pipeline_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
|
||||
|
||||
REGISTER_CALCULATOR(
|
||||
::mediapipe::tasks::vision::face_geometry::FaceGeometryPipelineCalculator);
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.face_geometry;
|
||||
|
||||
import "mediapipe/framework/calculator_options.proto";
|
||||
|
||||
message FaceGeometryPipelineCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional FaceGeometryPipelineCalculatorOptions ext = 512499200;
|
||||
}
|
||||
|
||||
optional string metadata_path = 1;
|
||||
}
|
59
mediapipe/tasks/cc/vision/face_geometry/data/BUILD
Normal file
59
mediapipe/tasks/cc/vision/face_geometry/data/BUILD
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Copyright 2023 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata_detection",
|
||||
input = "geometry_pipeline_metadata_detection.pbtxt",
|
||||
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata_detection.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata_landmarks",
|
||||
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata_landmarks.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
|
||||
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata",
|
||||
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# These canonical face model files are not meant to be used in runtime, but rather for asset
|
||||
# creation and/or reference.
|
||||
exports_files([
|
||||
"canonical_face_model.fbx",
|
||||
"canonical_face_model.obj",
|
||||
"canonical_face_model_uv_visualization.png",
|
||||
])
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 731 KiB |
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2023 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
input_source: FACE_DETECTION_PIPELINE
|
||||
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
|
||||
# NOTE: the triangular topology of the face meshes is only useful when derived
|
||||
# from the 468 face landmarks, not from the 6 face detection landmarks
|
||||
# (keypoints). The former don't cover the entire face and this mesh is
|
||||
# defined here only to comply with the API. It should be considered as
|
||||
# a placeholder and/or for debugging purposes.
|
||||
#
|
||||
# Use the face geometry derived from the face detection landmarks
|
||||
# (keypoints) for the face pose transformation matrix, not the mesh.
|
||||
canonical_mesh: {
|
||||
vertex_type: VERTEX_PT
|
||||
primitive_type: TRIANGLE
|
||||
vertex_buffer: -3.1511454582214355
|
||||
vertex_buffer: 2.6246179342269897
|
||||
vertex_buffer: 3.4656630754470825
|
||||
vertex_buffer: 0.349575996398926
|
||||
vertex_buffer: 0.38137748837470997
|
||||
vertex_buffer: 3.1511454582214355
|
||||
vertex_buffer: 2.6246179342269897
|
||||
vertex_buffer: 3.4656630754470825
|
||||
vertex_buffer: 0.650443494319916
|
||||
vertex_buffer: 0.38137999176979054
|
||||
vertex_buffer: 0.0
|
||||
vertex_buffer: -1.126865029335022
|
||||
vertex_buffer: 7.475604057312012
|
||||
vertex_buffer: 0.500025987625122
|
||||
vertex_buffer: 0.547487020492554
|
||||
vertex_buffer: 0.0
|
||||
vertex_buffer: -4.304508209228516
|
||||
vertex_buffer: 4.162498950958252
|
||||
vertex_buffer: 0.499989986419678
|
||||
vertex_buffer: 0.694203019142151
|
||||
vertex_buffer: -7.664182186126709
|
||||
vertex_buffer: 0.673132002353668
|
||||
vertex_buffer: -2.435867071151733
|
||||
vertex_buffer: 0.007561000064015
|
||||
vertex_buffer: 0.480777025222778
|
||||
vertex_buffer: 7.664182186126709
|
||||
vertex_buffer: 0.673132002353668
|
||||
vertex_buffer: -2.435867071151733
|
||||
vertex_buffer: 0.992439985275269
|
||||
vertex_buffer: 0.480777025222778
|
||||
index_buffer: 0
|
||||
index_buffer: 1
|
||||
index_buffer: 2
|
||||
index_buffer: 1
|
||||
index_buffer: 5
|
||||
index_buffer: 2
|
||||
index_buffer: 4
|
||||
index_buffer: 0
|
||||
index_buffer: 2
|
||||
index_buffer: 4
|
||||
index_buffer: 2
|
||||
index_buffer: 3
|
||||
index_buffer: 2
|
||||
index_buffer: 5
|
||||
index_buffer: 3
|
||||
}
|
File diff suppressed because it is too large
Load Diff
80
mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
Normal file
80
mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
Normal file
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2023 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "geometry_pipeline",
|
||||
srcs = ["geometry_pipeline.cc"],
|
||||
hdrs = ["geometry_pipeline.h"],
|
||||
deps = [
|
||||
":mesh_3d_utils",
|
||||
":procrustes_solver",
|
||||
":validation_utils",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mesh_3d_utils",
|
||||
srcs = ["mesh_3d_utils.cc"],
|
||||
hdrs = ["mesh_3d_utils.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "procrustes_solver",
|
||||
srcs = ["procrustes_solver.cc"],
|
||||
hdrs = ["procrustes_solver.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "validation_utils",
|
||||
srcs = ["validation_utils.cc"],
|
||||
hdrs = ["validation_utils.h"],
|
||||
deps = [
|
||||
":mesh_3d_utils",
|
||||
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,471 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
namespace {
|
||||
|
||||
struct PerspectiveCameraFrustum {
|
||||
// NOTE: all arguments must be validated prior to calling this constructor.
|
||||
PerspectiveCameraFrustum(const proto::PerspectiveCamera& perspective_camera,
|
||||
int frame_width, int frame_height) {
|
||||
static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
|
||||
|
||||
const float height_at_near =
|
||||
2.f * perspective_camera.near() *
|
||||
std::tan(0.5f * kDegreesToRadians *
|
||||
perspective_camera.vertical_fov_degrees());
|
||||
|
||||
const float width_at_near = frame_width * height_at_near / frame_height;
|
||||
|
||||
left = -0.5f * width_at_near;
|
||||
right = 0.5f * width_at_near;
|
||||
bottom = -0.5f * height_at_near;
|
||||
top = 0.5f * height_at_near;
|
||||
near = perspective_camera.near();
|
||||
far = perspective_camera.far();
|
||||
}
|
||||
|
||||
float left;
|
||||
float right;
|
||||
float bottom;
|
||||
float top;
|
||||
float near;
|
||||
float far;
|
||||
};
|
||||
|
||||
class ScreenToMetricSpaceConverter {
|
||||
public:
|
||||
ScreenToMetricSpaceConverter(
|
||||
proto::OriginPointLocation origin_point_location, //
|
||||
proto::InputSource input_source, //
|
||||
Eigen::Matrix3Xf&& canonical_metric_landmarks, //
|
||||
Eigen::VectorXf&& landmark_weights, //
|
||||
std::unique_ptr<ProcrustesSolver> procrustes_solver)
|
||||
: origin_point_location_(origin_point_location),
|
||||
input_source_(input_source),
|
||||
canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
|
||||
landmark_weights_(std::move(landmark_weights)),
|
||||
procrustes_solver_(std::move(procrustes_solver)) {}
|
||||
|
||||
// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
|
||||
// the `pose_transform_mat`.
|
||||
//
|
||||
// Here's the algorithm summary:
|
||||
//
|
||||
// (1) Project X- and Y- screen landmark coordinates at the Z near plane.
|
||||
//
|
||||
// (2) Estimate a canonical-to-runtime landmark set scale by running the
|
||||
// Procrustes solver using the screen runtime landmarks.
|
||||
//
|
||||
// On this iteration, screen landmarks are used instead of unprojected
|
||||
// metric landmarks as it is not safe to unproject due to the relative
|
||||
// nature of the input screen landmark Z coordinate.
|
||||
//
|
||||
// (3) Use the canonical-to-runtime scale from (2) to unproject the screen
|
||||
// landmarks. The result is referenced as "intermediate landmarks" because
|
||||
// they are the first estimation of the resuling metric landmarks, but are
|
||||
// not quite there yet.
|
||||
//
|
||||
// (4) Estimate a canonical-to-runtime landmark set scale by running the
|
||||
// Procrustes solver using the intermediate runtime landmarks.
|
||||
//
|
||||
// (5) Use the product of the scale factors from (2) and (4) to unproject
|
||||
// the screen landmarks the second time. This is the second and the final
|
||||
// estimation of the metric landmarks.
|
||||
//
|
||||
// (6) Multiply each of the metric landmarks by the inverse pose
|
||||
// transformation matrix to align the runtime metric face landmarks with
|
||||
// the canonical metric face landmarks.
|
||||
//
|
||||
// Note: the input screen landmarks are in the left-handed coordinate system,
|
||||
// however any metric landmarks - including the canonical metric
|
||||
// landmarks, the final runtime metric landmarks and any intermediate
|
||||
// runtime metric landmarks - are in the right-handed coordinate system.
|
||||
//
|
||||
// To keep the logic correct, the landmark set handedness is changed any
|
||||
// time the screen-to-metric semantic barrier is passed.
|
||||
absl::Status Convert(
|
||||
const mediapipe::NormalizedLandmarkList& screen_landmark_list, //
|
||||
const PerspectiveCameraFrustum& pcf, //
|
||||
mediapipe::LandmarkList& metric_landmark_list, //
|
||||
Eigen::Matrix4f& pose_transform_mat) const {
|
||||
RET_CHECK_EQ(screen_landmark_list.landmark_size(),
|
||||
canonical_metric_landmarks_.cols())
|
||||
<< "The number of landmarks doesn't match the number passed upon "
|
||||
"initialization!";
|
||||
|
||||
Eigen::Matrix3Xf screen_landmarks;
|
||||
ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
|
||||
|
||||
ProjectXY(pcf, screen_landmarks);
|
||||
const float depth_offset = screen_landmarks.row(2).mean();
|
||||
|
||||
// 1st iteration: don't unproject XY because it's unsafe to do so due to
|
||||
// the relative nature of the Z coordinate. Instead, run the
|
||||
// first estimation on the projected XY and use that scale to
|
||||
// unproject for the 2nd iteration.
|
||||
Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
|
||||
ChangeHandedness(intermediate_landmarks);
|
||||
|
||||
ASSIGN_OR_RETURN(const float first_iteration_scale,
|
||||
EstimateScale(intermediate_landmarks),
|
||||
_ << "Failed to estimate first iteration scale!");
|
||||
|
||||
// 2nd iteration: unproject XY using the scale from the 1st iteration.
|
||||
intermediate_landmarks = screen_landmarks;
|
||||
MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
|
||||
intermediate_landmarks);
|
||||
UnprojectXY(pcf, intermediate_landmarks);
|
||||
ChangeHandedness(intermediate_landmarks);
|
||||
|
||||
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||
// landmarks.
|
||||
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
|
||||
Eigen::Matrix4f intermediate_pose_transform_mat;
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, intermediate_landmarks,
|
||||
landmark_weights_, intermediate_pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
|
||||
intermediate_landmarks.row(2) =
|
||||
(intermediate_pose_transform_mat *
|
||||
canonical_metric_landmarks_.colwise().homogeneous())
|
||||
.row(2);
|
||||
}
|
||||
ASSIGN_OR_RETURN(const float second_iteration_scale,
|
||||
EstimateScale(intermediate_landmarks),
|
||||
_ << "Failed to estimate second iteration scale!");
|
||||
|
||||
// Use the total scale to unproject the screen landmarks.
|
||||
const float total_scale = first_iteration_scale * second_iteration_scale;
|
||||
MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
|
||||
UnprojectXY(pcf, screen_landmarks);
|
||||
ChangeHandedness(screen_landmarks);
|
||||
|
||||
// At this point, screen landmarks are converted into metric landmarks.
|
||||
Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
|
||||
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||
pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
|
||||
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||
// landmarks and run the pose transform estimation again.
|
||||
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
|
||||
metric_landmarks.row(2) =
|
||||
(pose_transform_mat *
|
||||
canonical_metric_landmarks_.colwise().homogeneous())
|
||||
.row(2);
|
||||
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||
pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
}
|
||||
|
||||
// Multiply each of the metric landmarks by the inverse pose
|
||||
// transformation matrix to align the runtime metric face landmarks with
|
||||
// the canonical metric face landmarks.
|
||||
metric_landmarks = (pose_transform_mat.inverse() *
|
||||
metric_landmarks.colwise().homogeneous())
|
||||
.topRows(3);
|
||||
|
||||
ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
void ProjectXY(const PerspectiveCameraFrustum& pcf,
|
||||
Eigen::Matrix3Xf& landmarks) const {
|
||||
float x_scale = pcf.right - pcf.left;
|
||||
float y_scale = pcf.top - pcf.bottom;
|
||||
float x_translation = pcf.left;
|
||||
float y_translation = pcf.bottom;
|
||||
|
||||
if (origin_point_location_ == proto::OriginPointLocation::TOP_LEFT_CORNER) {
|
||||
landmarks.row(1) = 1.f - landmarks.row(1).array();
|
||||
}
|
||||
|
||||
landmarks =
|
||||
landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
|
||||
landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
|
||||
}
|
||||
|
||||
absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
|
||||
Eigen::Matrix4f transform_mat;
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, landmarks, landmark_weights_,
|
||||
transform_mat))
|
||||
<< "Failed to estimate canonical-to-runtime landmark set transform!";
|
||||
|
||||
return transform_mat.col(0).norm();
|
||||
}
|
||||
|
||||
static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
|
||||
float depth_offset, float scale,
|
||||
Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(2) =
|
||||
(landmarks.array().row(2) - depth_offset + pcf.near) / scale;
|
||||
}
|
||||
|
||||
static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
|
||||
Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(0) =
|
||||
landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||
landmarks.row(1) =
|
||||
landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||
}
|
||||
|
||||
static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(2) *= -1.f;
|
||||
}
|
||||
|
||||
static void ConvertLandmarkListToEigenMatrix(
|
||||
const mediapipe::NormalizedLandmarkList& landmark_list,
|
||||
Eigen::Matrix3Xf& eigen_matrix) {
|
||||
eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
|
||||
for (int i = 0; i < landmark_list.landmark_size(); ++i) {
|
||||
const auto& landmark = landmark_list.landmark(i);
|
||||
eigen_matrix(0, i) = landmark.x();
|
||||
eigen_matrix(1, i) = landmark.y();
|
||||
eigen_matrix(2, i) = landmark.z();
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertEigenMatrixToLandmarkList(
|
||||
const Eigen::Matrix3Xf& eigen_matrix,
|
||||
mediapipe::LandmarkList& landmark_list) {
|
||||
landmark_list.Clear();
|
||||
|
||||
for (int i = 0; i < eigen_matrix.cols(); ++i) {
|
||||
auto& landmark = *landmark_list.add_landmark();
|
||||
landmark.set_x(eigen_matrix(0, i));
|
||||
landmark.set_y(eigen_matrix(1, i));
|
||||
landmark.set_z(eigen_matrix(2, i));
|
||||
}
|
||||
}
|
||||
|
||||
const proto::OriginPointLocation origin_point_location_;
|
||||
const proto::InputSource input_source_;
|
||||
Eigen::Matrix3Xf canonical_metric_landmarks_;
|
||||
Eigen::VectorXf landmark_weights_;
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> procrustes_solver_;
|
||||
};
|
||||
|
||||
class GeometryPipelineImpl : public GeometryPipeline {
|
||||
public:
|
||||
GeometryPipelineImpl(
|
||||
const proto::PerspectiveCamera& perspective_camera, //
|
||||
const proto::Mesh3d& canonical_mesh, //
|
||||
uint32_t canonical_mesh_vertex_size, //
|
||||
uint32_t canonical_mesh_num_vertices,
|
||||
uint32_t canonical_mesh_vertex_position_offset,
|
||||
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
|
||||
: perspective_camera_(perspective_camera),
|
||||
canonical_mesh_(canonical_mesh),
|
||||
canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
|
||||
canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
|
||||
canonical_mesh_vertex_position_offset_(
|
||||
canonical_mesh_vertex_position_offset),
|
||||
space_converter_(std::move(space_converter)) {}
|
||||
|
||||
absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
|
||||
const std::vector<mediapipe::NormalizedLandmarkList>&
|
||||
multi_face_landmarks,
|
||||
int frame_width, int frame_height) const override {
|
||||
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
|
||||
<< "Invalid frame dimensions!";
|
||||
|
||||
// Create a perspective camera frustum to be shared for geometry estimation
|
||||
// per each face.
|
||||
PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
|
||||
frame_height);
|
||||
|
||||
std::vector<proto::FaceGeometry> multi_face_geometry;
|
||||
|
||||
// From this point, the meaning of "face landmarks" is clarified further as
|
||||
// "screen face landmarks". This is done do distinguish from "metric face
|
||||
// landmarks" that are derived during the face geometry estimation process.
|
||||
for (const mediapipe::NormalizedLandmarkList& screen_face_landmarks :
|
||||
multi_face_landmarks) {
|
||||
// Having a too compact screen landmark list will result in numerical
|
||||
// instabilities, therefore such faces are filtered.
|
||||
if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert the screen landmarks into the metric landmarks and get the pose
|
||||
// transformation matrix.
|
||||
mediapipe::LandmarkList metric_face_landmarks;
|
||||
Eigen::Matrix4f pose_transform_mat;
|
||||
MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
|
||||
metric_face_landmarks,
|
||||
pose_transform_mat))
|
||||
<< "Failed to convert landmarks from the screen to the metric space!";
|
||||
|
||||
// Pack geometry data for this face.
|
||||
proto::FaceGeometry face_geometry;
|
||||
proto::Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
|
||||
// Copy the canonical face mesh as the face geometry mesh.
|
||||
mutable_mesh->CopyFrom(canonical_mesh_);
|
||||
// Replace XYZ vertex mesh coodinates with the metric landmark positions.
|
||||
for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
|
||||
uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
|
||||
canonical_mesh_vertex_position_offset_;
|
||||
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
|
||||
metric_face_landmarks.landmark(i).x());
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
|
||||
metric_face_landmarks.landmark(i).y());
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
|
||||
metric_face_landmarks.landmark(i).z());
|
||||
}
|
||||
// Populate the face pose transformation matrix.
|
||||
mediapipe::MatrixDataProtoFromMatrix(
|
||||
pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
|
||||
|
||||
multi_face_geometry.push_back(face_geometry);
|
||||
}
|
||||
|
||||
return multi_face_geometry;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool IsScreenLandmarkListTooCompact(
|
||||
const mediapipe::NormalizedLandmarkList& screen_landmarks) {
|
||||
float mean_x = 0.f;
|
||||
float mean_y = 0.f;
|
||||
for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
|
||||
const auto& landmark = screen_landmarks.landmark(i);
|
||||
mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
|
||||
mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
|
||||
}
|
||||
|
||||
float max_sq_dist = 0.f;
|
||||
for (const auto& landmark : screen_landmarks.landmark()) {
|
||||
const float d_x = landmark.x() - mean_x;
|
||||
const float d_y = landmark.y() - mean_y;
|
||||
max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
|
||||
}
|
||||
|
||||
static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
|
||||
return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
|
||||
}
|
||||
|
||||
const proto::PerspectiveCamera perspective_camera_;
|
||||
const proto::Mesh3d canonical_mesh_;
|
||||
const uint32_t canonical_mesh_vertex_size_;
|
||||
const uint32_t canonical_mesh_num_vertices_;
|
||||
const uint32_t canonical_mesh_vertex_position_offset_;
|
||||
|
||||
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||
const proto::Environment& environment,
|
||||
const proto::GeometryPipelineMetadata& metadata) {
|
||||
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
|
||||
<< "Invalid geometry pipeline metadata!";
|
||||
|
||||
const auto& canonical_mesh = metadata.canonical_mesh();
|
||||
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||
VertexComponent::POSITION))
|
||||
<< "Canonical face mesh must have the `POSITION` vertex component!";
|
||||
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||
VertexComponent::TEX_COORD))
|
||||
<< "Canonical face mesh must have the `TEX_COORD` vertex component!";
|
||||
|
||||
uint32_t canonical_mesh_vertex_size =
|
||||
GetVertexSize(canonical_mesh.vertex_type());
|
||||
uint32_t canonical_mesh_num_vertices =
|
||||
canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
|
||||
uint32_t canonical_mesh_vertex_position_offset =
|
||||
GetVertexComponentOffset(canonical_mesh.vertex_type(),
|
||||
VertexComponent::POSITION)
|
||||
.value();
|
||||
|
||||
// Put the Procrustes landmark basis into Eigen matrices for an easier access.
|
||||
Eigen::Matrix3Xf canonical_metric_landmarks =
|
||||
Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
|
||||
Eigen::VectorXf landmark_weights =
|
||||
Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
|
||||
|
||||
for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
|
||||
uint32_t vertex_buffer_offset =
|
||||
canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
|
||||
|
||||
canonical_metric_landmarks(0, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset);
|
||||
canonical_metric_landmarks(1, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
|
||||
canonical_metric_landmarks(2, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
|
||||
}
|
||||
|
||||
for (const proto::WeightedLandmarkRef& wlr :
|
||||
metadata.procrustes_landmark_basis()) {
|
||||
uint32_t landmark_id = wlr.landmark_id();
|
||||
landmark_weights(landmark_id) = wlr.weight();
|
||||
}
|
||||
|
||||
std::unique_ptr<GeometryPipeline> result =
|
||||
absl::make_unique<GeometryPipelineImpl>(
|
||||
environment.perspective_camera(), canonical_mesh,
|
||||
canonical_mesh_vertex_size, canonical_mesh_num_vertices,
|
||||
canonical_mesh_vertex_position_offset,
|
||||
absl::make_unique<ScreenToMetricSpaceConverter>(
|
||||
environment.origin_point_location(),
|
||||
metadata.input_source() == proto::InputSource::DEFAULT
|
||||
? proto::InputSource::FACE_LANDMARK_PIPELINE
|
||||
: metadata.input_source(),
|
||||
std::move(canonical_metric_landmarks),
|
||||
std::move(landmark_weights),
|
||||
CreateFloatPrecisionProcrustesSolver()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
|
||||
// Encapsulates a stateless estimator of facial geometry in a Metric space based
|
||||
// on the normalized face landmarks in the Screen space.
|
||||
class GeometryPipeline {
|
||||
public:
|
||||
virtual ~GeometryPipeline() = default;
|
||||
|
||||
// Estimates geometry data for multiple faces.
|
||||
//
|
||||
// Returns an error status if any of the passed arguments is invalid.
|
||||
//
|
||||
// The result includes face geometry data for a subset of the input faces,
|
||||
// however geometry data for some faces might be missing. This may happen if
|
||||
// it'd be unstable to estimate the facial geometry based on a corresponding
|
||||
// face landmark list for any reason (for example, if the landmark list is too
|
||||
// compact).
|
||||
//
|
||||
// Each face landmark list must have the same number of landmarks as was
|
||||
// passed upon initialization via the canonical face mesh (as a part of the
|
||||
// geometry pipeline metadata).
|
||||
//
|
||||
// Both `frame_width` and `frame_height` must be positive.
|
||||
virtual absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
|
||||
const std::vector<mediapipe::NormalizedLandmarkList>&
|
||||
multi_face_landmarks,
|
||||
int frame_width, int frame_height) const = 0;
|
||||
};
|
||||
|
||||
// Creates an instance of `GeometryPipeline`.
|
||||
//
|
||||
// Both `environment` and `metadata` must be valid (for details, please refer to
|
||||
// the proto message definition comments and/or `validation_utils.h/cc`).
|
||||
//
|
||||
// Canonical face mesh (defined as a part of `metadata`) must have the
|
||||
// `POSITION` and the `TEX_COORD` vertex components.
|
||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||
const proto::Environment& environment,
|
||||
const proto::GeometryPipelineMetadata& metadata);
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
103
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
Normal file
103
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
namespace {
|
||||
|
||||
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
case VertexComponent::TEX_COORD:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
return 3;
|
||||
case VertexComponent::TEX_COORD:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
return 0;
|
||||
case VertexComponent::TEX_COORD:
|
||||
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type) {
|
||||
switch (vertex_type) {
|
||||
case proto::Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
|
||||
GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type) {
|
||||
switch (primitive_type) {
|
||||
case proto::Mesh3d::TRIANGLE:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
|
||||
VertexComponent vertex_component) {
|
||||
switch (vertex_type) {
|
||||
case proto::Mesh3d::VERTEX_PT:
|
||||
return HasVertexComponentVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||
<< "A given vertex type doesn't have the requested component!";
|
||||
|
||||
switch (vertex_type) {
|
||||
case proto::Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentOffsetVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||
<< "A given vertex type doesn't have the requested component!";
|
||||
|
||||
switch (vertex_type) {
|
||||
case proto::Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentSizeVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
51
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
Normal file
51
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
|
||||
enum class VertexComponent { POSITION, TEX_COORD };
|
||||
|
||||
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type);
|
||||
|
||||
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type);
|
||||
|
||||
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
|
||||
VertexComponent vertex_component);
|
||||
|
||||
// Computes the vertex component offset.
|
||||
//
|
||||
// Returns an error status if a given vertex type doesn't have the requested
|
||||
// component.
|
||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||
|
||||
// Computes the vertex component size.
|
||||
//
|
||||
// Returns an error status if a given vertex type doesn't have the requested
|
||||
// component.
|
||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
|
@ -0,0 +1,264 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
namespace {
|
||||
|
||||
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
|
||||
public:
|
||||
FloatPrecisionProcrustesSolver() = default;
|
||||
|
||||
absl::Status SolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& source_points, //
|
||||
const Eigen::Matrix3Xf& target_points, //
|
||||
const Eigen::VectorXf& point_weights,
|
||||
Eigen::Matrix4f& transform_mat) const override {
|
||||
// Validate inputs.
|
||||
MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
|
||||
<< "Failed to validate weighted orthogonal problem input points!";
|
||||
MP_RETURN_IF_ERROR(
|
||||
ValidatePointWeights(source_points.cols(), point_weights))
|
||||
<< "Failed to validate weighted orthogonal problem point weights!";
|
||||
|
||||
// Extract square root from the point weights.
|
||||
Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
|
||||
|
||||
// Try to solve the WEOP problem.
|
||||
MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
|
||||
source_points, target_points, sqrt_weights, transform_mat))
|
||||
<< "Failed to solve the WEOP problem!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||
|
||||
static absl::Status ValidateInputPoints(
|
||||
const Eigen::Matrix3Xf& source_points,
|
||||
const Eigen::Matrix3Xf& target_points) {
|
||||
RET_CHECK_GT(source_points.cols(), 0)
|
||||
<< "The number of source points must be positive!";
|
||||
|
||||
RET_CHECK_EQ(source_points.cols(), target_points.cols())
|
||||
<< "The number of source and target points must be equal!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static absl::Status ValidatePointWeights(
|
||||
int num_points, const Eigen::VectorXf& point_weights) {
|
||||
RET_CHECK_GT(point_weights.size(), 0)
|
||||
<< "The number of point weights must be positive!";
|
||||
|
||||
RET_CHECK_EQ(point_weights.size(), num_points)
|
||||
<< "The number of points and point weights must be equal!";
|
||||
|
||||
float total_weight = 0.f;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
RET_CHECK_GE(point_weights(i), 0.f)
|
||||
<< "Each point weight must be non-negative!";
|
||||
|
||||
total_weight += point_weights(i);
|
||||
}
|
||||
|
||||
RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
|
||||
<< "The total point weight is too small!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static Eigen::VectorXf ExtractSquareRoot(
|
||||
const Eigen::VectorXf& point_weights) {
|
||||
Eigen::VectorXf sqrt_weights(point_weights);
|
||||
for (int i = 0; i < sqrt_weights.size(); ++i) {
|
||||
sqrt_weights(i) = std::sqrt(sqrt_weights(i));
|
||||
}
|
||||
|
||||
return sqrt_weights;
|
||||
}
|
||||
|
||||
// Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
|
||||
// a single 4x4 transformation matrix.
|
||||
static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
|
||||
const Eigen::Vector3f& t) {
|
||||
Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
|
||||
result.leftCols(3).topRows(3) = r_and_s;
|
||||
result.col(3).topRows(3) = t;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The weighted problem is thoroughly addressed in Section 2.4 of:
|
||||
// D. Akca, Generalized Procrustes analysis and its applications
|
||||
// in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
|
||||
//
|
||||
// Notable differences in the code presented here are:
|
||||
//
|
||||
// * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
|
||||
// Our W_p is diagonal (equal to diag(sqrt_weights^2)),
|
||||
// so we can just set Q = diag(sqrt_weights) instead.
|
||||
//
|
||||
// * In the paper, the problem is presented as
|
||||
// (for W_k = I and W_p = tranposed(Q) Q):
|
||||
// || Q (c A T + j tranposed(t) - B) || -> min.
|
||||
//
|
||||
// We reformulate it as an equivalent minimization of the transpose's
|
||||
// norm:
|
||||
// || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
|
||||
// where tranposed(A) and tranposed(B) are the source and the target point
|
||||
// clouds, respectively, c tranposed(T) is the rotation+scaling R sought
|
||||
// for, and Q is diag(sqrt_weights).
|
||||
//
|
||||
// Most of the derivations are therefore transposed.
|
||||
//
|
||||
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
static absl::Status InternalSolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
|
||||
const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
|
||||
// tranposed(A_w).
|
||||
Eigen::Matrix3Xf weighted_sources =
|
||||
sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||
// tranposed(B_w).
|
||||
Eigen::Matrix3Xf weighted_targets =
|
||||
targets.array().rowwise() * sqrt_weights.array().transpose();
|
||||
|
||||
// w = tranposed(j_w) j_w.
|
||||
float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
|
||||
|
||||
// Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
|
||||
// Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
|
||||
//
|
||||
// tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
|
||||
// (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
|
||||
//
|
||||
// where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
|
||||
Eigen::Matrix3Xf twice_weighted_sources =
|
||||
weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||
Eigen::Vector3f source_center_of_mass =
|
||||
twice_weighted_sources.rowwise().sum() / total_weight;
|
||||
// tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
|
||||
// tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
|
||||
Eigen::Matrix3Xf centered_weighted_sources =
|
||||
weighted_sources - source_center_of_mass * sqrt_weights.transpose();
|
||||
|
||||
Eigen::Matrix3f rotation;
|
||||
MP_RETURN_IF_ERROR(ComputeOptimalRotation(
|
||||
weighted_targets * centered_weighted_sources.transpose(), rotation))
|
||||
<< "Failed to compute the optimal rotation!";
|
||||
ASSIGN_OR_RETURN(
|
||||
float scale,
|
||||
ComputeOptimalScale(centered_weighted_sources, weighted_sources,
|
||||
weighted_targets, rotation),
|
||||
_ << "Failed to compute the optimal scale!");
|
||||
|
||||
// R = c tranposed(T).
|
||||
Eigen::Matrix3f rotation_and_scale = scale * rotation;
|
||||
|
||||
// Compute optimal translation for the weighted problem.
|
||||
|
||||
// tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
|
||||
const auto pointwise_diffs =
|
||||
weighted_targets - rotation_and_scale * weighted_sources;
|
||||
// Multiplication by j_w is a respectively weighted column sum.
|
||||
// (54) from the paper.
|
||||
const auto weighted_pointwise_diffs =
|
||||
pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
|
||||
Eigen::Vector3f translation =
|
||||
weighted_pointwise_diffs.rowwise().sum() / total_weight;
|
||||
|
||||
transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// `design_matrix` is a transposed LHS of (51) in the paper.
|
||||
//
|
||||
// Note: the output `rotation` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
static absl::Status ComputeOptimalRotation(
|
||||
const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
|
||||
RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
|
||||
<< "Design matrix norm is too small!";
|
||||
|
||||
Eigen::JacobiSVD<Eigen::Matrix3f> svd(
|
||||
design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
|
||||
|
||||
Eigen::Matrix3f postrotation = svd.matrixU();
|
||||
Eigen::Matrix3f prerotation = svd.matrixV().transpose();
|
||||
|
||||
// Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
|
||||
// see "4.6 Constrained orthogonal Procrustes problems"
|
||||
// in the Gower & Dijksterhuis's book "Procrustes Analysis".
|
||||
// We flip the sign of the least singular value along with a column in W.
|
||||
//
|
||||
// Note that now the sum of singular values doesn't work for scale
|
||||
// estimation due to this sign flip.
|
||||
if (postrotation.determinant() * prerotation.determinant() <
|
||||
static_cast<float>(0)) {
|
||||
postrotation.col(2) *= static_cast<float>(-1);
|
||||
}
|
||||
|
||||
// Transposed (52) from the paper.
|
||||
rotation = postrotation * prerotation;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static absl::StatusOr<float> ComputeOptimalScale(
|
||||
const Eigen::Matrix3Xf& centered_weighted_sources,
|
||||
const Eigen::Matrix3Xf& weighted_sources,
|
||||
const Eigen::Matrix3Xf& weighted_targets,
|
||||
const Eigen::Matrix3f& rotation) {
|
||||
// tranposed(T) tranposed(A_w) (I - C).
|
||||
const auto rotated_centered_weighted_sources =
|
||||
rotation * centered_weighted_sources;
|
||||
// Use the identity trace(A B) = sum(A * B^T)
|
||||
// to avoid building large intermediate matrices (* is Hadamard product).
|
||||
// (53) from the paper.
|
||||
float numerator =
|
||||
rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
|
||||
float denominator =
|
||||
centered_weighted_sources.cwiseProduct(weighted_sources).sum();
|
||||
|
||||
RET_CHECK_GT(denominator, kAbsoluteErrorEps)
|
||||
<< "Scale expression denominator is too small!";
|
||||
RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
|
||||
<< "Scale is too small!";
|
||||
|
||||
return numerator / denominator;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
|
||||
return absl::make_unique<FloatPrecisionProcrustesSolver>();
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
|
||||
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
|
||||
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
|
||||
// https://doi.org/10.3929/ethz-a-004656648.
|
||||
//
|
||||
// Given the source and the target point clouds, the algorithm estimates
|
||||
// a 4x4 transformation matrix featuring the following semantic components:
|
||||
//
|
||||
// * Uniform scale
|
||||
// * Rotation
|
||||
// * Translation
|
||||
//
|
||||
// The matrix maps the source point cloud into the target point cloud minimizing
|
||||
// the Mean Squared Error.
|
||||
class ProcrustesSolver {
|
||||
public:
|
||||
virtual ~ProcrustesSolver() = default;
|
||||
|
||||
// Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
|
||||
//
|
||||
// All `source_points`, `target_points` and `point_weights` must define the
|
||||
// same number of points. Elements of `point_weights` must be non-negative.
|
||||
//
|
||||
// A too small diameter of either of the point clouds will likely lead to
|
||||
// numerical instabilities and failure to estimate the transformation.
|
||||
//
|
||||
// A too small point cloud total weight will likely lead to numerical
|
||||
// instabilities and failure to estimate the transformation too.
|
||||
//
|
||||
// Small point coordinate deviation for either of the point cloud will likely
|
||||
// result in a failure as it will make the solution very unstable if possible.
|
||||
//
|
||||
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
virtual absl::Status SolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& source_points, //
|
||||
const Eigen::Matrix3Xf& target_points, //
|
||||
const Eigen::VectorXf& point_weights, //
|
||||
Eigen::Matrix4f& transform_mat) const = 0;
|
||||
};
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
127
mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
Normal file
127
mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
Normal file
|
@ -0,0 +1,127 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
|
||||
absl::Status ValidatePerspectiveCamera(
|
||||
const proto::PerspectiveCamera& perspective_camera) {
|
||||
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||
|
||||
RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
|
||||
<< "Near Z must be greater than 0 with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_GT(perspective_camera.far(),
|
||||
perspective_camera.near() + kAbsoluteErrorEps)
|
||||
<< "Far Z must be greater than Near Z with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
|
||||
<< "Vertical FOV must be positive with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
|
||||
180.f)
|
||||
<< "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateEnvironment(const proto::Environment& environment) {
|
||||
MP_RETURN_IF_ERROR(
|
||||
ValidatePerspectiveCamera(environment.perspective_camera()))
|
||||
<< "Invalid perspective camera!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d) {
|
||||
const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
|
||||
const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
|
||||
|
||||
RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
|
||||
<< "Vertex buffer size must a multiple of the vertex size!";
|
||||
|
||||
RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
|
||||
<< "Index buffer size must a multiple of the primitive size!";
|
||||
|
||||
const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
|
||||
for (uint32_t idx : mesh_3d.index_buffer()) {
|
||||
RET_CHECK_LT(idx, num_vertices)
|
||||
<< "All mesh indices must refer to an existing vertex!";
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry) {
|
||||
MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
|
||||
|
||||
static constexpr char kInvalid4x4MatrixMessage[] =
|
||||
"Pose transformation matrix must be a 4x4 matrix!";
|
||||
|
||||
const mediapipe::MatrixData& pose_transform_matrix =
|
||||
face_geometry.pose_transform_matrix();
|
||||
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||
RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
|
||||
<< kInvalid4x4MatrixMessage;
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateGeometryPipelineMetadata(
|
||||
const proto::GeometryPipelineMetadata& metadata) {
|
||||
MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
|
||||
<< "Invalid canonical mesh!";
|
||||
|
||||
RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
|
||||
|
||||
<< "Procrustes landmark basis must be non-empty!";
|
||||
|
||||
const int num_vertices =
|
||||
metadata.canonical_mesh().vertex_buffer_size() /
|
||||
GetVertexSize(metadata.canonical_mesh().vertex_type());
|
||||
for (const proto::WeightedLandmarkRef& wlr :
|
||||
metadata.procrustes_landmark_basis()) {
|
||||
RET_CHECK_LT(wlr.landmark_id(), num_vertices)
|
||||
<< "All Procrustes basis indices must refer to an existing canonical "
|
||||
"mesh vertex!";
|
||||
|
||||
RET_CHECK_GE(wlr.weight(), 0.f)
|
||||
<< "All Procrustes basis landmarks must have a non-negative weight!";
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
|
||||
RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
|
||||
RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::tasks::vision::face_geometry {
|
||||
|
||||
// Validates `perspective_camera`.
|
||||
//
|
||||
// Near Z must be greater than 0 with a margin of `1e-9`.
|
||||
// Far Z must be greater than Near Z with a margin of `1e-9`.
|
||||
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
|
||||
// edges.
|
||||
absl::Status ValidatePerspectiveCamera(
|
||||
const proto::PerspectiveCamera& perspective_camera);
|
||||
|
||||
// Validates `environment`.
|
||||
//
|
||||
// Environment's perspective camera must be valid.
|
||||
absl::Status ValidateEnvironment(const proto::Environment& environment);
|
||||
|
||||
// Validates `mesh_3d`.
|
||||
//
|
||||
// Mesh vertex buffer size must a multiple of the vertex size.
|
||||
// Mesh index buffer size must a multiple of the primitive size.
|
||||
// All mesh indices must reference an existing mesh vertex.
|
||||
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d);
|
||||
|
||||
// Validates `face_geometry`.
|
||||
//
|
||||
// Face mesh must be valid.
|
||||
// Face pose transformation matrix must be a 4x4 matrix.
|
||||
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry);
|
||||
|
||||
// Validates `metadata`.
|
||||
//
|
||||
// Canonical face mesh must be valid.
|
||||
// Procrustes landmark basis must be non-empty.
|
||||
// All Procrustes basis indices must reference an existing canonical mesh
|
||||
// vertex.
|
||||
// All Procrustes basis landmarks must have a non-negative weight.
|
||||
absl::Status ValidateGeometryPipelineMetadata(
|
||||
const proto::GeometryPipelineMetadata& metadata);
|
||||
|
||||
// Validates frame dimensions.
|
||||
//
|
||||
// Both frame width and frame height must be positive.
|
||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
|
||||
|
||||
} // namespace mediapipe::tasks::vision::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
46
mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
Normal file
46
mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Copyright 2023 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "environment_proto",
|
||||
srcs = ["environment.proto"],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "face_geometry_proto",
|
||||
srcs = ["face_geometry.proto"],
|
||||
deps = [
|
||||
":mesh_3d_proto",
|
||||
"//mediapipe/framework/formats:matrix_data_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "geometry_pipeline_metadata_proto",
|
||||
srcs = ["geometry_pipeline_metadata.proto"],
|
||||
deps = [
|
||||
":mesh_3d_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "mesh_3d_proto",
|
||||
srcs = ["mesh_3d.proto"],
|
||||
)
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.face_geometry.proto;
|
||||
|
||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||
option java_outer_classname = "EnvironmentProto";
|
||||
|
||||
// Defines the (0, 0) origin point location of the environment.
|
||||
//
|
||||
// The variation in the origin point location can be traced back to the memory
|
||||
// layout of the camera video frame buffers.
|
||||
//
|
||||
// Usually, the memory layout for most CPU (and also some GPU) camera video
|
||||
// frame buffers results in having the (0, 0) origin point located in the
|
||||
// Top Left corner.
|
||||
//
|
||||
// On the contrary, the memory layout for most GPU camera video frame buffers
|
||||
// results in having the (0, 0) origin point located in the Bottom Left corner.
|
||||
//
|
||||
// Let's consider the following example:
|
||||
//
|
||||
// (A) ---------------+
|
||||
// ___ |
|
||||
// | (1) | | |
|
||||
// | / \ | | |
|
||||
// | |---|===|-| |
|
||||
// | |---| | | |
|
||||
// | / \ | | |
|
||||
// | | | | | |
|
||||
// | | (2) |=| | |
|
||||
// | | | | | |
|
||||
// | |_______| |_| |
|
||||
// | |@| |@| | | |
|
||||
// | ___________|_|_ |
|
||||
// |
|
||||
// (B) ---------------+
|
||||
//
|
||||
// On this example, (1) and (2) have the same X coordinate regardless of the
|
||||
// origin point location. However, having the origin point located at (A)
|
||||
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
|
||||
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
|
||||
// results in (1) having a greater Y coordinate if compared to (2).
|
||||
//
|
||||
// Providing the correct origin point location for your environment and making
|
||||
// sure all the input landmarks are in-sync with this location is crucial
|
||||
// for receiving the correct output face geometry and visual renders.
|
||||
enum OriginPointLocation {
|
||||
BOTTOM_LEFT_CORNER = 1;
|
||||
TOP_LEFT_CORNER = 2;
|
||||
}
|
||||
|
||||
// The perspective camera is defined through its vertical FOV angle and the
|
||||
// Z-clipping planes. The aspect ratio is a runtime variable for the face
|
||||
// geometry module and should be provided alongside the face landmarks in order
|
||||
// to estimate the face geometry on a given frame.
|
||||
//
|
||||
// More info on Perspective Cameras:
|
||||
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
|
||||
message PerspectiveCamera {
|
||||
// `0 < vertical_fov_degrees < 180`.
|
||||
optional float vertical_fov_degrees = 1;
|
||||
// `0 < near < far`.
|
||||
optional float near = 2;
|
||||
optional float far = 3;
|
||||
}
|
||||
|
||||
message Environment {
|
||||
optional OriginPointLocation origin_point_location = 1;
|
||||
optional PerspectiveCamera perspective_camera = 2;
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.face_geometry.proto;
|
||||
|
||||
import "mediapipe/framework/formats/matrix_data.proto";
|
||||
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
|
||||
|
||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||
option java_outer_classname = "FaceGeometryProto";
|
||||
|
||||
// Defines the face geometry pipeline estimation result format.
|
||||
message FaceGeometry {
|
||||
// Defines a mesh surface for a face. The face mesh vertex IDs are the same as
|
||||
// the face landmark IDs.
|
||||
//
|
||||
// XYZ coordinates exist in the right-handed Metric 3D space configured by an
|
||||
// environment. UV coodinates are taken from the canonical face mesh model.
|
||||
//
|
||||
// XY coordinates are guaranteed to match the screen positions of
|
||||
// the input face landmarks after (1) being multiplied by the face pose
|
||||
// transformation matrix and then (2) being projected with a perspective
|
||||
// camera matrix of the same environment.
|
||||
//
|
||||
// NOTE: the triangular topology of the face mesh is only useful when derived
|
||||
// from the 468 face landmarks, not from the 6 face detection landmarks
|
||||
// (keypoints). The former don't cover the entire face and this mesh is
|
||||
// defined here only to comply with the API. It should be considered as
|
||||
// a placeholder and/or for debugging purposes.
|
||||
//
|
||||
// Use the face geometry derived from the face detection landmarks
|
||||
// (keypoints) for the face pose transformation matrix, not the mesh.
|
||||
optional Mesh3d mesh = 1;
|
||||
|
||||
// Defines a face pose transformation matrix, which provides mapping from
|
||||
// the static canonical face model to the runtime face. Tries to distinguish
|
||||
// a head pose change from a facial expression change and to only reflect the
|
||||
// former.
|
||||
//
|
||||
// Is a 4x4 matrix and contains only the following components:
|
||||
// * Uniform scale
|
||||
// * Rotation
|
||||
// * Translation
|
||||
//
|
||||
// The last row is guaranteed to be `[0 0 0 1]`.
|
||||
optional mediapipe.MatrixData pose_transform_matrix = 2;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.face_geometry.proto;
|
||||
|
||||
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
|
||||
|
||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||
option java_outer_classname = "GeometryPipelineMetadataProto";
|
||||
|
||||
enum InputSource {
|
||||
DEFAULT = 0; // FACE_LANDMARK_PIPELINE
|
||||
FACE_LANDMARK_PIPELINE = 1;
|
||||
FACE_DETECTION_PIPELINE = 2;
|
||||
}
|
||||
|
||||
message WeightedLandmarkRef {
|
||||
// Defines the landmark ID. References an existing face landmark ID.
|
||||
optional uint32 landmark_id = 1;
|
||||
// Defines the landmark weight. The larger the weight the more influence this
|
||||
// landmark has in the basis.
|
||||
//
|
||||
// Is positive.
|
||||
optional float weight = 2;
|
||||
}
|
||||
|
||||
// Next field ID: 4
|
||||
message GeometryPipelineMetadata {
|
||||
// Defines the source of the input landmarks to let the underlying geometry
|
||||
// pipeline to adjust in order to produce the best results.
|
||||
//
|
||||
// Face landmark pipeline is expected to produce 3D landmarks with relative Z
|
||||
// coordinate, which is scaled as the X coordinate assuming the weak
|
||||
// perspective projection camera model.
|
||||
//
|
||||
// Face landmark pipeline is expected to produce 2D landmarks with Z
|
||||
// coordinate being equal to 0.
|
||||
optional InputSource input_source = 3;
|
||||
// Defines a mesh surface for a canonical face. The canonical face mesh vertex
|
||||
// IDs are the same as the face landmark IDs.
|
||||
//
|
||||
// XYZ coordinates are defined in centimeter units.
|
||||
optional Mesh3d canonical_mesh = 1;
|
||||
// Defines a weighted landmark basis for running the Procrustes solver
|
||||
// algorithm inside the geometry pipeline.
|
||||
//
|
||||
// A good basis sets face landmark weights in way to distinguish a head pose
|
||||
// change from a facial expression change and to only respond to the former.
|
||||
repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
|
||||
}
|
41
mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
Normal file
41
mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
Normal file
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks.vision.face_geometry.proto;
|
||||
|
||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||
option java_outer_classname = "Mesh3dProto";
|
||||
|
||||
message Mesh3d {
|
||||
enum VertexType {
|
||||
// Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
|
||||
VERTEX_PT = 0;
|
||||
}
|
||||
|
||||
enum PrimitiveType {
|
||||
// Is defined by 3 indices: triangle vertex IDs.
|
||||
TRIANGLE = 0;
|
||||
}
|
||||
|
||||
optional VertexType vertex_type = 1;
|
||||
optional PrimitiveType primitive_type = 2;
|
||||
// Vertex buffer size is a multiple of the vertex size (e.g., 5 for
|
||||
// VERTEX_PT).
|
||||
repeated float vertex_buffer = 3;
|
||||
// Index buffer size is a multiple of the primitive size (e.g., 3 for
|
||||
// TRIANGLE).
|
||||
repeated uint32 index_buffer = 4;
|
||||
}
|
108
mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
Normal file
108
mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
Normal file
|
@ -0,0 +1,108 @@
|
|||
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "tensors_to_image_calculator_proto",
|
||||
srcs = ["tensors_to_image_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/gpu:gpu_origin_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_image_calculator",
|
||||
srcs = ["tensors_to_image_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
features = ["-layering_check"], # allow depending on tensor_to_image_calculator_gpu_deps
|
||||
linkopts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-framework CoreVideo",
|
||||
"-framework MetalKit",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
":tensors_to_image_calculator_cc_proto",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/strings",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/api2:builder",
|
||||
"//mediapipe/framework/api2:node",
|
||||
"//mediapipe/framework/api2:packet",
|
||||
"//mediapipe/framework/api2:port",
|
||||
"//mediapipe/framework/formats:image",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:vector",
|
||||
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||
] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"//conditions:default": ["tensor_to_image_calculator_gpu_deps"],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensor_to_image_calculator_gpu_deps",
|
||||
visibility = ["//visibility:private"],
|
||||
deps = select({
|
||||
"//mediapipe:android": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gl_quad_renderer",
|
||||
"//mediapipe/gpu:gl_simple_shaders",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
|
||||
],
|
||||
"//mediapipe:ios": [
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
"//mediapipe/gpu:MPPMetalUtil",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
"//mediapipe:macos": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gl_quad_renderer",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
|
||||
],
|
||||
}),
|
||||
)
|
|
@ -0,0 +1,439 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/api2/packet.h"
|
||||
#include "mediapipe/framework/api2/port.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h"
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
#import <CoreVideo/CoreVideo.h>
|
||||
#import <Metal/Metal.h>
|
||||
#import <MetalKit/MetalKit.h>
|
||||
|
||||
#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h"
|
||||
#import "mediapipe/gpu/MPPMetalHelper.h"
|
||||
#else
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gl_quad_renderer.h"
|
||||
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_shader.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
namespace {
|
||||
|
||||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Node;
|
||||
using ::mediapipe::api2::Output;
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
using ::tflite::gpu::gl::GlProgram;
|
||||
using ::tflite::gpu::gl::GlShader;
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||
|
||||
// Commonly used to compute the number of blocks to launch in a kernel.
|
||||
static int NumGroups(const int size, const int group_size) { // NOLINT
|
||||
return (size + group_size - 1) / group_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Converts a MediaPipe tensor to a MediaPipe Image.
|
||||
//
|
||||
// Input streams:
|
||||
// TENSORS - std::vector<mediapipe::Tensor> that only contains one element.
|
||||
//
|
||||
// Output streams:
|
||||
// OUTPUT - mediapipe::Image.
|
||||
//
|
||||
// TODO: Enable TensorsToImageCalculator to run on CPU.
|
||||
class TensorsToImageCalculator : public Node {
|
||||
public:
|
||||
static constexpr Input<std::vector<Tensor>> kInputTensors{"TENSORS"};
|
||||
static constexpr Output<Image> kOutputImage{"IMAGE"};
|
||||
|
||||
MEDIAPIPE_NODE_CONTRACT(kInputTensors, kOutputImage);
|
||||
|
||||
static absl::Status UpdateContract(CalculatorContract* cc);
|
||||
absl::Status Open(CalculatorContext* cc);
|
||||
absl::Status Process(CalculatorContext* cc);
|
||||
absl::Status Close(CalculatorContext* cc);
|
||||
|
||||
private:
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
bool metal_initialized_ = false;
|
||||
MPPMetalHelper* gpu_helper_ = nullptr;
|
||||
id<MTLComputePipelineState> to_buffer_program_;
|
||||
|
||||
absl::Status MetalSetup(CalculatorContext* cc);
|
||||
absl::Status MetalProcess(CalculatorContext* cc);
|
||||
#else
|
||||
absl::Status GlSetup(CalculatorContext* cc);
|
||||
|
||||
GlCalculatorHelper gl_helper_;
|
||||
|
||||
bool gl_initialized_ = false;
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
std::unique_ptr<tflite::gpu::gl::GlProgram> gl_compute_program_;
|
||||
const tflite::gpu::uint3 workgroup_size_ = {8, 8, 1};
|
||||
#else
|
||||
GLuint program_ = 0;
|
||||
std::unique_ptr<mediapipe::QuadRenderer> gl_renderer_;
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
};
|
||||
MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToImageCalculator);
|
||||
|
||||
absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
|
||||
#else
|
||||
return GlCalculatorHelper::UpdateContract(cc);
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||
RET_CHECK(gpu_helper_);
|
||||
#else
|
||||
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
return MetalProcess(cc);
|
||||
|
||||
#else
|
||||
|
||||
return gl_helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||
if (!gl_initialized_) {
|
||||
MP_RETURN_IF_ERROR(GlSetup(cc));
|
||||
gl_initialized_ = true;
|
||||
}
|
||||
|
||||
if (kInputTensors(cc).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
const auto& input_tensors = kInputTensors(cc).Get();
|
||||
RET_CHECK_EQ(input_tensors.size(), 1)
|
||||
<< "Expect 1 input tensor, but have " << input_tensors.size();
|
||||
const int tensor_width = input_tensors[0].shape().dims[2];
|
||||
const int tensor_height = input_tensors[0].shape().dims[1];
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
auto out_texture = std::make_unique<tflite::gpu::gl::GlTexture>();
|
||||
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
|
||||
tflite::gpu::DataType::UINT8, // GL_RGBA8
|
||||
{tensor_width, tensor_height}, out_texture.get()));
|
||||
|
||||
const int output_index = 0;
|
||||
glBindImageTexture(output_index, out_texture->id(), 0, GL_FALSE, 0,
|
||||
GL_WRITE_ONLY, GL_RGBA8);
|
||||
|
||||
auto read_view = input_tensors[0].GetOpenGlBufferReadView();
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name());
|
||||
|
||||
const tflite::gpu::uint3 workload = {tensor_width, tensor_height, 1};
|
||||
const tflite::gpu::uint3 workgroups =
|
||||
tflite::gpu::DivideRoundUp(workload, workgroup_size_);
|
||||
|
||||
glUseProgram(gl_compute_program_->id());
|
||||
glUniform2i(glGetUniformLocation(gl_compute_program_->id(), "out_size"),
|
||||
tensor_width, tensor_height);
|
||||
|
||||
MP_RETURN_IF_ERROR(gl_compute_program_->Dispatch(workgroups));
|
||||
|
||||
auto texture_buffer = mediapipe::GlTextureBuffer::Wrap(
|
||||
out_texture->target(), out_texture->id(), tensor_width, tensor_height,
|
||||
mediapipe::GpuBufferFormat::kBGRA32,
|
||||
[ptr = out_texture.release()](
|
||||
std::shared_ptr<mediapipe::GlSyncPoint> sync_token) mutable {
|
||||
delete ptr;
|
||||
});
|
||||
|
||||
auto output =
|
||||
std::make_unique<mediapipe::GpuBuffer>(std::move(texture_buffer));
|
||||
kOutputImage(cc).Send(Image(*output));
|
||||
;
|
||||
|
||||
#else
|
||||
|
||||
if (!input_tensors[0].ready_as_opengl_texture_2d()) {
|
||||
(void)input_tensors[0].GetCpuReadView();
|
||||
}
|
||||
|
||||
auto output_texture =
|
||||
gl_helper_.CreateDestinationTexture(tensor_width, tensor_height);
|
||||
gl_helper_.BindFramebuffer(output_texture); // GL_TEXTURE0
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D,
|
||||
input_tensors[0].GetOpenGlTexture2dReadView().name());
|
||||
|
||||
MP_RETURN_IF_ERROR(gl_renderer_->GlRender(
|
||||
tensor_width, tensor_height, output_texture.width(),
|
||||
output_texture.height(), mediapipe::FrameScaleMode::kStretch,
|
||||
mediapipe::FrameRotation::kNone,
|
||||
/*flip_horizontal=*/false, /*flip_vertical=*/false,
|
||||
/*flip_texture=*/false));
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
auto output = output_texture.GetFrame<GpuBuffer>();
|
||||
kOutputImage(cc).Send(Image(*output));
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
return mediapipe::OkStatus();
|
||||
});
|
||||
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||
gl_helper_.RunInGlContext([this] {
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
gl_compute_program_.reset();
|
||||
#else
|
||||
if (program_) glDeleteProgram(program_);
|
||||
program_ = 0;
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
});
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) {
|
||||
if (!metal_initialized_) {
|
||||
MP_RETURN_IF_ERROR(MetalSetup(cc));
|
||||
metal_initialized_ = true;
|
||||
}
|
||||
|
||||
if (kInputTensors(cc).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
const auto& input_tensors = kInputTensors(cc).Get();
|
||||
RET_CHECK_EQ(input_tensors.size(), 1)
|
||||
<< "Expect 1 input tensor, but have " << input_tensors.size();
|
||||
const int tensor_width = input_tensors[0].shape().dims[2];
|
||||
const int tensor_height = input_tensors[0].shape().dims[1];
|
||||
|
||||
// TODO: Fix unused variable
|
||||
[[maybe_unused]] id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
|
||||
command_buffer.label = @"TensorsToImageCalculatorConvert";
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
[command_buffer computeCommandEncoder];
|
||||
[compute_encoder setComputePipelineState:to_buffer_program_];
|
||||
|
||||
auto input_view =
|
||||
mediapipe::MtlBufferView::GetReadView(input_tensors[0], command_buffer);
|
||||
[compute_encoder setBuffer:input_view.buffer() offset:0 atIndex:0];
|
||||
|
||||
mediapipe::GpuBuffer output =
|
||||
[gpu_helper_ mediapipeGpuBufferWithWidth:tensor_width
|
||||
height:tensor_height];
|
||||
id<MTLTexture> dst_texture = [gpu_helper_ metalTextureWithGpuBuffer:output];
|
||||
[compute_encoder setTexture:dst_texture atIndex:1];
|
||||
|
||||
MTLSize threads_per_group = MTLSizeMake(8, 8, 1);
|
||||
MTLSize threadgroups =
|
||||
MTLSizeMake(NumGroups(tensor_width, 8), NumGroups(tensor_height, 8), 1);
|
||||
[compute_encoder dispatchThreadgroups:threadgroups
|
||||
threadsPerThreadgroup:threads_per_group];
|
||||
[compute_encoder endEncoding];
|
||||
[command_buffer commit];
|
||||
|
||||
kOutputImage(cc).Send(Image(output));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) {
|
||||
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||
const std::string shader_source =
|
||||
R"(
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
kernel void convertKernel(
|
||||
device float* in_buf [[ buffer(0) ]],
|
||||
texture2d<float, access::read_write> out_tex [[ texture(1) ]],
|
||||
uint2 gid [[ thread_position_in_grid ]]) {
|
||||
if (gid.x >= out_tex.get_width() || gid.y >= out_tex.get_height()) return;
|
||||
uint linear_index = 3 * (gid.y * out_tex.get_width() + gid.x);
|
||||
float4 out_value = float4(in_buf[linear_index], in_buf[linear_index + 1], in_buf[linear_index + 2], 1.0);
|
||||
out_tex.write(out_value, gid);
|
||||
}
|
||||
)";
|
||||
NSString* library_source =
|
||||
[NSString stringWithUTF8String:shader_source.c_str()];
|
||||
NSError* error = nil;
|
||||
id<MTLLibrary> library =
|
||||
[device newLibraryWithSource:library_source options:nullptr error:&error];
|
||||
RET_CHECK(library != nil) << "Couldn't create shader library "
|
||||
<< [[error localizedDescription] UTF8String];
|
||||
id<MTLFunction> kernel_func = nil;
|
||||
kernel_func = [library newFunctionWithName:@"convertKernel"];
|
||||
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
|
||||
to_buffer_program_ =
|
||||
[device newComputePipelineStateWithFunction:kernel_func error:&error];
|
||||
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
|
||||
[[error localizedDescription] UTF8String];
|
||||
|
||||
return mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||
absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) {
|
||||
std::string maybe_flip_y_define;
|
||||
#if !defined(__APPLE__)
|
||||
const auto& options = cc->Options<TensorsToImageCalculatorOptions>();
|
||||
if (options.gpu_origin() != mediapipe::GpuOrigin::TOP_LEFT) {
|
||||
maybe_flip_y_define = R"(
|
||||
#define FLIP_Y_COORD
|
||||
)";
|
||||
}
|
||||
#endif // !defined(__APPLE__)
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
const std::string shader_header =
|
||||
absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size_), R"(
|
||||
precision highp float;
|
||||
layout(rgba8, binding = 0) writeonly uniform highp image2D output_texture;
|
||||
uniform ivec2 out_size;
|
||||
)");
|
||||
|
||||
const std::string shader_body = R"(
|
||||
layout(std430, binding = 2) readonly buffer B0 {
|
||||
float elements[];
|
||||
} input_data; // data tensor
|
||||
|
||||
void main() {
|
||||
int out_width = out_size.x;
|
||||
int out_height = out_size.y;
|
||||
|
||||
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (gid.x >= out_width || gid.y >= out_height) { return; }
|
||||
int linear_index = 3 * (gid.y * out_width + gid.x);
|
||||
|
||||
#ifdef FLIP_Y_COORD
|
||||
int y_coord = out_height - gid.y - 1;
|
||||
#else
|
||||
int y_coord = gid.y;
|
||||
#endif // defined(FLIP_Y_COORD)
|
||||
|
||||
ivec2 out_coordinate = ivec2(gid.x, y_coord);
|
||||
vec4 out_value = vec4(input_data.elements[linear_index], input_data.elements[linear_index + 1], input_data.elements[linear_index + 2], 1.0);
|
||||
imageStore(output_texture, out_coordinate, out_value);
|
||||
})";
|
||||
|
||||
const std::string shader_full =
|
||||
absl::StrCat(shader_header, maybe_flip_y_define, shader_body);
|
||||
|
||||
GlShader shader;
|
||||
MP_RETURN_IF_ERROR(
|
||||
GlShader::CompileShader(GL_COMPUTE_SHADER, shader_full, &shader));
|
||||
gl_compute_program_ = std::make_unique<GlProgram>();
|
||||
MP_RETURN_IF_ERROR(
|
||||
GlProgram::CreateWithShader(shader, gl_compute_program_.get()));
|
||||
|
||||
#else
|
||||
constexpr GLchar kFragColorOutputDeclaration[] = R"(
|
||||
#ifdef GL_ES
|
||||
#define fragColor gl_FragColor
|
||||
#else
|
||||
out vec4 fragColor;
|
||||
#endif // defined(GL_ES);
|
||||
)";
|
||||
|
||||
constexpr GLchar kBody[] = R"(
|
||||
DEFAULT_PRECISION(mediump, float)
|
||||
in vec2 sample_coordinate;
|
||||
uniform sampler2D tensor;
|
||||
void main() {
|
||||
#ifdef FLIP_Y_COORD
|
||||
float y_coord = 1.0 - sample_coordinate.y;
|
||||
#else
|
||||
float y_coord = sample_coordinate.y;
|
||||
#endif // defined(FLIP_Y_COORD)
|
||||
vec3 color = texture2D(tensor, vec2(sample_coordinate.x, y_coord)).rgb;
|
||||
fragColor = vec4(color, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
const std::string src =
|
||||
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
|
||||
kFragColorOutputDeclaration, maybe_flip_y_define, kBody);
|
||||
gl_renderer_ = std::make_unique<mediapipe::QuadRenderer>();
|
||||
MP_RETURN_IF_ERROR(gl_renderer_->GlSetup(src.c_str(), {"tensor"}));
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
return mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
} // namespace tasks
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.tasks;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/gpu/gpu_origin.proto";
|
||||
|
||||
message TensorsToImageCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional TensorsToImageCalculatorOptions ext = 511831156;
|
||||
}
|
||||
|
||||
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||
// to be flipped vertically as tensors are expected to start at top.
|
||||
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||
optional mediapipe.GpuOrigin.Mode gpu_origin = 1;
|
||||
}
|
|
@ -203,106 +203,111 @@ INSTANTIATE_TEST_CASE_P(
|
|||
CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest,
|
||||
testing::ValuesIn<CombinedPredictionCalculatorTestCase>({
|
||||
{
|
||||
.test_name = "TestCustomDramaWinnnerWith_HighCanned_Thresh",
|
||||
.custom_negative_score = 0.1,
|
||||
.drama_score = 0.5,
|
||||
.llama_score = 0.3,
|
||||
.drama_thresh = 0.25,
|
||||
.llama_thresh = 0.7,
|
||||
.canned_negative_score = 0.1,
|
||||
.bazinga_score = 0.3,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.3,
|
||||
.bazinga_thresh = 0.7,
|
||||
.joy_thresh = 0.7,
|
||||
.peace_thresh = 0.7,
|
||||
.max_scoring_label = "CustomDrama",
|
||||
.max_score = 0.5,
|
||||
/* test_name= */ "TestCustomDramaWinnnerWith_HighCanned_Thresh",
|
||||
/* custom_negative_score= */ 0.1,
|
||||
/* drama_score= */ 0.5,
|
||||
/* llama_score= */ 0.3,
|
||||
/* drama_thresh= */ 0.25,
|
||||
/* llama_thresh= */ 0.7,
|
||||
/* canned_negative_score= */ 0.1,
|
||||
/* bazinga_score= */ 0.3,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.3,
|
||||
/* bazinga_thresh= */ 0.7,
|
||||
/* joy_thresh= */ 0.7,
|
||||
/* peace_thresh= */ 0.7,
|
||||
/* max_scoring_label= */ "CustomDrama",
|
||||
/* max_score= */ 0.5,
|
||||
},
|
||||
{
|
||||
.test_name = "TestCannedWinnerWith_HighCustom_ZeroCanned_Thresh",
|
||||
.custom_negative_score = 0.1,
|
||||
.drama_score = 0.3,
|
||||
.llama_score = 0.6,
|
||||
.drama_thresh = 0.4,
|
||||
.llama_thresh = 0.8,
|
||||
.canned_negative_score = 0.1,
|
||||
.bazinga_score = 0.4,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.2,
|
||||
.bazinga_thresh = 0.0,
|
||||
.joy_thresh = 0.0,
|
||||
.peace_thresh = 0.0,
|
||||
.max_scoring_label = "CannedBazinga",
|
||||
.max_score = 0.4,
|
||||
/* test_name= */ "TestCannedWinnerWith_HighCustom_ZeroCanned_"
|
||||
"Thresh",
|
||||
/* custom_negative_score= */ 0.1,
|
||||
/* drama_score= */ 0.3,
|
||||
/* llama_score= */ 0.6,
|
||||
/* drama_thresh= */ 0.4,
|
||||
/* llama_thresh= */ 0.8,
|
||||
/* canned_negative_score= */ 0.1,
|
||||
/* bazinga_score= */ 0.4,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.2,
|
||||
/* bazinga_thresh= */ 0.0,
|
||||
/* joy_thresh= */ 0.0,
|
||||
/* peace_thresh= */ 0.0,
|
||||
/* max_scoring_label= */ "CannedBazinga",
|
||||
/* max_score= */ 0.4,
|
||||
},
|
||||
{
|
||||
.test_name = "TestNegativeWinnerWith_LowCustom_HighCanned_Thresh",
|
||||
.custom_negative_score = 0.5,
|
||||
.drama_score = 0.1,
|
||||
.llama_score = 0.4,
|
||||
.drama_thresh = 0.1,
|
||||
.llama_thresh = 0.05,
|
||||
.canned_negative_score = 0.1,
|
||||
.bazinga_score = 0.3,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.3,
|
||||
.bazinga_thresh = 0.7,
|
||||
.joy_thresh = 0.7,
|
||||
.peace_thresh = 0.7,
|
||||
.max_scoring_label = "Negative",
|
||||
.max_score = 0.5,
|
||||
/* test_name= */ "TestNegativeWinnerWith_LowCustom_HighCanned_"
|
||||
"Thresh",
|
||||
/* custom_negative_score= */ 0.5,
|
||||
/* drama_score= */ 0.1,
|
||||
/* llama_score= */ 0.4,
|
||||
/* drama_thresh= */ 0.1,
|
||||
/* llama_thresh= */ 0.05,
|
||||
/* canned_negative_score= */ 0.1,
|
||||
/* bazinga_score= */ 0.3,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.3,
|
||||
/* bazinga_thresh= */ 0.7,
|
||||
/* joy_thresh= */ 0.7,
|
||||
/* peace_thresh= */ 0.7,
|
||||
/* max_scoring_label= */ "Negative",
|
||||
/* max_score= */ 0.5,
|
||||
},
|
||||
{
|
||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh",
|
||||
.custom_negative_score = 0.8,
|
||||
.drama_score = 0.1,
|
||||
.llama_score = 0.1,
|
||||
.drama_thresh = 0.25,
|
||||
.llama_thresh = 0.7,
|
||||
.canned_negative_score = 0.1,
|
||||
.bazinga_score = 0.3,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.3,
|
||||
.bazinga_thresh = 0.7,
|
||||
.joy_thresh = 0.7,
|
||||
.peace_thresh = 0.7,
|
||||
.max_scoring_label = "Negative",
|
||||
.max_score = 0.8,
|
||||
/* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
|
||||
"Thresh",
|
||||
/* custom_negative_score= */ 0.8,
|
||||
/* drama_score= */ 0.1,
|
||||
/* llama_score= */ 0.1,
|
||||
/* drama_thresh= */ 0.25,
|
||||
/* llama_thresh= */ 0.7,
|
||||
/* canned_negative_score= */ 0.1,
|
||||
/* bazinga_score= */ 0.3,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.3,
|
||||
/* bazinga_thresh= */ 0.7,
|
||||
/* joy_thresh= */ 0.7,
|
||||
/* peace_thresh= */ 0.7,
|
||||
/* max_scoring_label= */ "Negative",
|
||||
/* max_score= */ 0.8,
|
||||
},
|
||||
{
|
||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCannedThresh2",
|
||||
.custom_negative_score = 0.1,
|
||||
.drama_score = 0.2,
|
||||
.llama_score = 0.7,
|
||||
.drama_thresh = 1.1,
|
||||
.llama_thresh = 1.1,
|
||||
.canned_negative_score = 0.1,
|
||||
.bazinga_score = 0.3,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.3,
|
||||
.bazinga_thresh = 0.7,
|
||||
.joy_thresh = 0.7,
|
||||
.peace_thresh = 0.7,
|
||||
.max_scoring_label = "Negative",
|
||||
.max_score = 0.1,
|
||||
/* test_name= */ "TestNegativeWinnerWith_HighCustom_"
|
||||
"HighCannedThresh2",
|
||||
/* custom_negative_score= */ 0.1,
|
||||
/* drama_score= */ 0.2,
|
||||
/* llama_score= */ 0.7,
|
||||
/* drama_thresh= */ 1.1,
|
||||
/* llama_thresh= */ 1.1,
|
||||
/* canned_negative_score= */ 0.1,
|
||||
/* bazinga_score= */ 0.3,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.3,
|
||||
/* bazinga_thresh= */ 0.7,
|
||||
/* joy_thresh= */ 0.7,
|
||||
/* peace_thresh= */ 0.7,
|
||||
/* max_scoring_label= */ "Negative",
|
||||
/* max_score= */ 0.1,
|
||||
},
|
||||
{
|
||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh3",
|
||||
.custom_negative_score = 0.1,
|
||||
.drama_score = 0.3,
|
||||
.llama_score = 0.6,
|
||||
.drama_thresh = 0.4,
|
||||
.llama_thresh = 0.8,
|
||||
.canned_negative_score = 0.3,
|
||||
.bazinga_score = 0.2,
|
||||
.joy_score = 0.3,
|
||||
.peace_score = 0.2,
|
||||
.bazinga_thresh = 0.5,
|
||||
.joy_thresh = 0.5,
|
||||
.peace_thresh = 0.5,
|
||||
.max_scoring_label = "Negative",
|
||||
.max_score = 0.1,
|
||||
/* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
|
||||
"Thresh3",
|
||||
/* custom_negative_score= */ 0.1,
|
||||
/* drama_score= */ 0.3,
|
||||
/* llama_score= */ 0.6,
|
||||
/* drama_thresh= */ 0.4,
|
||||
/* llama_thresh= */ 0.8,
|
||||
/* canned_negative_score= */ 0.3,
|
||||
/* bazinga_score= */ 0.2,
|
||||
/* joy_score= */ 0.3,
|
||||
/* peace_score= */ 0.2,
|
||||
/* bazinga_thresh= */ 0.5,
|
||||
/* joy_thresh= */ 0.5,
|
||||
/* peace_thresh= */ 0.5,
|
||||
/* max_scoring_label= */ "Negative",
|
||||
/* max_score= */ 0.1,
|
||||
},
|
||||
}),
|
||||
[](const testing::TestParamInfo<
|
||||
|
|
|
@ -117,24 +117,24 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest,
|
||||
testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>(
|
||||
{{.test_name = "TestWithOffset0",
|
||||
.base_offset = 0,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.1f,
|
||||
.expected_cell_1_5 = 0.1875f,
|
||||
.rotation = 0},
|
||||
{.test_name = "TestWithOffset21",
|
||||
.base_offset = 21,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.1f,
|
||||
.expected_cell_1_5 = 0.1875f,
|
||||
.rotation = 0},
|
||||
{.test_name = "TestWithRotation",
|
||||
.base_offset = 0,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.075f,
|
||||
.expected_cell_1_5 = -0.25f,
|
||||
.rotation = M_PI / 2.0}}),
|
||||
{{/* test_name= */ "TestWithOffset0",
|
||||
/* base_offset= */ 0,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.1f,
|
||||
/* expected_cell_1_5= */ 0.1875f,
|
||||
/* rotation= */ 0},
|
||||
{/* test_name= */ "TestWithOffset21",
|
||||
/* base_offset= */ 21,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.1f,
|
||||
/* expected_cell_1_5= */ 0.1875f,
|
||||
/* rotation= */ 0},
|
||||
{/* test_name= */ "TestWithRotation",
|
||||
/* base_offset= */ 0,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.075f,
|
||||
/* expected_cell_1_5= */ -0.25f,
|
||||
/* rotation= */ M_PI / 2.0}}),
|
||||
[](const testing::TestParamInfo<
|
||||
Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
|
||||
return info.param.test_name;
|
||||
|
@ -203,30 +203,30 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest,
|
||||
testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>(
|
||||
{{.test_name = "TestWithOffset0",
|
||||
.base_offset = 0,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.1f,
|
||||
.expected_cell_1_5 = 0.25,
|
||||
.rotation = 0},
|
||||
{.test_name = "TestWithOffset21",
|
||||
.base_offset = 21,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.1f,
|
||||
.expected_cell_1_5 = 0.25,
|
||||
.rotation = 0},
|
||||
{.test_name = "NoObjectNormalization",
|
||||
.base_offset = 0,
|
||||
.object_normalization_origin_offset = -1,
|
||||
.expected_cell_0_2 = 0.021f,
|
||||
.expected_cell_1_5 = 0.052f,
|
||||
.rotation = 0},
|
||||
{.test_name = "TestWithRotation",
|
||||
.base_offset = 0,
|
||||
.object_normalization_origin_offset = 0,
|
||||
.expected_cell_0_2 = 0.1f,
|
||||
.expected_cell_1_5 = -0.25f,
|
||||
.rotation = M_PI / 2.0}}),
|
||||
{{/* test_name= */ "TestWithOffset0",
|
||||
/* base_offset= */ 0,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.1f,
|
||||
/* expected_cell_1_5= */ 0.25,
|
||||
/* rotation= */ 0},
|
||||
{/* test_name= */ "TestWithOffset21",
|
||||
/* base_offset= */ 21,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.1f,
|
||||
/* expected_cell_1_5= */ 0.25,
|
||||
/* rotation= */ 0},
|
||||
{/* test_name= */ "NoObjectNormalization",
|
||||
/* base_offset= */ 0,
|
||||
/* object_normalization_origin_offset= */ -1,
|
||||
/* expected_cell_0_2= */ 0.021f,
|
||||
/* expected_cell_1_5= */ 0.052f,
|
||||
/* rotation= */ 0},
|
||||
{/* test_name= */ "TestWithRotation",
|
||||
/* base_offset= */ 0,
|
||||
/* object_normalization_origin_offset= */ 0,
|
||||
/* expected_cell_0_2= */ 0.1f,
|
||||
/* expected_cell_1_5= */ -0.25f,
|
||||
/* rotation= */ M_PI / 2.0}}),
|
||||
[](const testing::TestParamInfo<
|
||||
LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
|
||||
return info.param.test_name;
|
||||
|
|
|
@ -257,19 +257,28 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
|||
preprocessed_tensors >> inference.In("TENSORS");
|
||||
auto model_output_tensors = inference.Out("TENSORS");
|
||||
|
||||
// TODO: support hand detection metadata.
|
||||
bool has_metadata = false;
|
||||
|
||||
// Generates a single side packet containing a vector of SSD anchors.
|
||||
auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator");
|
||||
ConfigureSsdAnchorsCalculator(
|
||||
&ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>());
|
||||
auto& ssd_anchor_options =
|
||||
ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>();
|
||||
if (!has_metadata) {
|
||||
ConfigureSsdAnchorsCalculator(&ssd_anchor_options);
|
||||
}
|
||||
auto anchors = ssd_anchor.SideOut("");
|
||||
|
||||
// Converts output tensors to Detections.
|
||||
auto& tensors_to_detections =
|
||||
graph.AddNode("TensorsToDetectionsCalculator");
|
||||
if (!has_metadata) {
|
||||
ConfigureTensorsToDetectionsCalculator(
|
||||
subgraph_options,
|
||||
&tensors_to_detections
|
||||
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
|
||||
}
|
||||
|
||||
model_output_tensors >> tensors_to_detections.In("TENSORS");
|
||||
anchors >> tensors_to_detections.SideIn("ANCHORS");
|
||||
auto detections = tensors_to_detections.Out("DETECTIONS");
|
||||
|
|
|
@ -148,6 +148,7 @@ cc_library(
|
|||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
||||
"//mediapipe/util:graph_builder_utils",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
@ -14,6 +14,7 @@ limitations under the License.
|
|||
==============================================================================*/
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
@ -41,6 +42,7 @@ limitations under the License.
|
|||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||
#include "mediapipe/util/graph_builder_utils.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace tasks {
|
||||
|
@ -53,7 +55,7 @@ using ::mediapipe::NormalizedRect;
|
|||
using ::mediapipe::api2::Input;
|
||||
using ::mediapipe::api2::Output;
|
||||
using ::mediapipe::api2::builder::Graph;
|
||||
using ::mediapipe::api2::builder::Source;
|
||||
using ::mediapipe::api2::builder::Stream;
|
||||
using ::mediapipe::tasks::components::utils::DisallowIf;
|
||||
using ::mediapipe::tasks::core::ModelAssetBundleResources;
|
||||
using ::mediapipe::tasks::metadata::SetExternalFile;
|
||||
|
@ -78,40 +80,46 @@ constexpr char kHandLandmarksDetectorTFLiteName[] =
|
|||
"hand_landmarks_detector.tflite";
|
||||
|
||||
struct HandLandmarkerOutputs {
|
||||
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
|
||||
Source<std::vector<LandmarkList>> world_landmark_lists;
|
||||
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
|
||||
Source<std::vector<ClassificationList>> handednesses;
|
||||
Source<std::vector<NormalizedRect>> palm_rects;
|
||||
Source<std::vector<Detection>> palm_detections;
|
||||
Source<Image> image;
|
||||
Stream<std::vector<NormalizedLandmarkList>> landmark_lists;
|
||||
Stream<std::vector<LandmarkList>> world_landmark_lists;
|
||||
Stream<std::vector<NormalizedRect>> hand_rects_next_frame;
|
||||
Stream<std::vector<ClassificationList>> handednesses;
|
||||
Stream<std::vector<NormalizedRect>> palm_rects;
|
||||
Stream<std::vector<Detection>> palm_detections;
|
||||
Stream<Image> image;
|
||||
};
|
||||
|
||||
// Sets the base options in the sub tasks.
|
||||
absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
|
||||
HandLandmarkerGraphOptions* options,
|
||||
bool is_copy) {
|
||||
ASSIGN_OR_RETURN(const auto hand_detector_file,
|
||||
resources.GetModelFile(kHandDetectorTFLiteName));
|
||||
auto* hand_detector_graph_options =
|
||||
options->mutable_hand_detector_graph_options();
|
||||
if (!hand_detector_graph_options->base_options().has_model_asset()) {
|
||||
ASSIGN_OR_RETURN(const auto hand_detector_file,
|
||||
resources.GetModelFile(kHandDetectorTFLiteName));
|
||||
SetExternalFile(hand_detector_file,
|
||||
hand_detector_graph_options->mutable_base_options()
|
||||
->mutable_model_asset(),
|
||||
is_copy);
|
||||
}
|
||||
hand_detector_graph_options->mutable_base_options()
|
||||
->mutable_acceleration()
|
||||
->CopyFrom(options->base_options().acceleration());
|
||||
hand_detector_graph_options->mutable_base_options()->set_use_stream_mode(
|
||||
options->base_options().use_stream_mode());
|
||||
ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
|
||||
resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
|
||||
auto* hand_landmarks_detector_graph_options =
|
||||
options->mutable_hand_landmarks_detector_graph_options();
|
||||
SetExternalFile(hand_landmarks_detector_file,
|
||||
if (!hand_landmarks_detector_graph_options->base_options()
|
||||
.has_model_asset()) {
|
||||
ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
|
||||
resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
|
||||
SetExternalFile(
|
||||
hand_landmarks_detector_file,
|
||||
hand_landmarks_detector_graph_options->mutable_base_options()
|
||||
->mutable_model_asset(),
|
||||
is_copy);
|
||||
}
|
||||
hand_landmarks_detector_graph_options->mutable_base_options()
|
||||
->mutable_acceleration()
|
||||
->CopyFrom(options->base_options().acceleration());
|
||||
|
@ -119,7 +127,6 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
|
|||
->set_use_stream_mode(options->base_options().use_stream_mode());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
|
||||
|
@ -219,12 +226,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
!sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
|
||||
.IsAvailable()));
|
||||
}
|
||||
Stream<Image> image_in = graph.In(kImageTag).Cast<Image>();
|
||||
std::optional<Stream<NormalizedRect>> norm_rect_in;
|
||||
if (HasInput(sc->OriginalNode(), kNormRectTag)) {
|
||||
norm_rect_in = graph.In(kNormRectTag).Cast<NormalizedRect>();
|
||||
}
|
||||
ASSIGN_OR_RETURN(
|
||||
auto hand_landmarker_outputs,
|
||||
BuildHandLandmarkerGraph(
|
||||
sc->Options<HandLandmarkerGraphOptions>(),
|
||||
graph[Input<Image>(kImageTag)],
|
||||
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
|
||||
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
|
||||
image_in, norm_rect_in, graph));
|
||||
hand_landmarker_outputs.landmark_lists >>
|
||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||
hand_landmarker_outputs.world_landmark_lists >>
|
||||
|
@ -262,8 +272,8 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
||||
// graph: the mediapipe graph instance to be updated.
|
||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
|
||||
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
|
||||
Source<NormalizedRect> norm_rect_in, Graph& graph) {
|
||||
const HandLandmarkerGraphOptions& tasks_options, Stream<Image> image_in,
|
||||
std::optional<Stream<NormalizedRect>> norm_rect_in, Graph& graph) {
|
||||
const int max_num_hands =
|
||||
tasks_options.hand_detector_graph_options().num_hands();
|
||||
|
||||
|
@ -293,10 +303,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
// track the hands from the last frame.
|
||||
auto image_for_hand_detector =
|
||||
DisallowIf(image_in, has_enough_hands, graph);
|
||||
auto norm_rect_in_for_hand_detector =
|
||||
DisallowIf(norm_rect_in, has_enough_hands, graph);
|
||||
std::optional<Stream<NormalizedRect>> norm_rect_in_for_hand_detector;
|
||||
if (norm_rect_in) {
|
||||
norm_rect_in_for_hand_detector =
|
||||
DisallowIf(norm_rect_in.value(), has_enough_hands, graph);
|
||||
}
|
||||
image_for_hand_detector >> hand_detector.In("IMAGE");
|
||||
norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT");
|
||||
if (norm_rect_in_for_hand_detector) {
|
||||
norm_rect_in_for_hand_detector.value() >> hand_detector.In("NORM_RECT");
|
||||
}
|
||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||
auto& hand_association = graph.AddNode("HandAssociationCalculator");
|
||||
hand_association.GetOptions<HandAssociationCalculatorOptions>()
|
||||
|
@ -313,7 +328,9 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
|||
// series, and we don't want to enable the tracking and hand associations
|
||||
// between input images. Always use the hand detector graph.
|
||||
image_in >> hand_detector.In("IMAGE");
|
||||
norm_rect_in >> hand_detector.In("NORM_RECT");
|
||||
if (norm_rect_in) {
|
||||
norm_rect_in.value() >> hand_detector.In("NORM_RECT");
|
||||
}
|
||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||
hand_rects_from_hand_detector >> clip_hand_rects.In("");
|
||||
}
|
||||
|
|
|
@ -34,16 +34,14 @@ objc_library(
|
|||
data = [
|
||||
"//mediapipe/tasks/testdata/vision:test_images",
|
||||
],
|
||||
sdk_frameworks = [
|
||||
"CoreMedia",
|
||||
"CoreVideo",
|
||||
"CoreGraphics",
|
||||
"UIKit",
|
||||
"Accelerate",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
||||
"//third_party/apple_frameworks:Accelerate",
|
||||
"//third_party/apple_frameworks:CoreGraphics",
|
||||
"//third_party/apple_frameworks:CoreMedia",
|
||||
"//third_party/apple_frameworks:CoreVideo",
|
||||
"//third_party/apple_frameworks:UIKit",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -11,11 +11,6 @@ objc_library(
|
|||
"-std=c++17",
|
||||
],
|
||||
module_name = "MPPImage",
|
||||
sdk_frameworks = [
|
||||
"CoreMedia",
|
||||
"CoreVideo",
|
||||
"UIKit",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
|
||||
|
||||
/**
|
||||
* This class helps create various kinds of packets for Mediapipe Vision Tasks.
|
||||
*/
|
||||
@interface MPPVisionPacketCreator : NSObject
|
||||
|
||||
+ (mediapipe::Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error;
|
||||
|
||||
@end
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
|
||||
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
|
||||
|
||||
#include "mediapipe/framework/formats/image.h"
|
||||
|
||||
namespace {
|
||||
using ::mediapipe::Image;
|
||||
using ::mediapipe::ImageFrame;
|
||||
using ::mediapipe::MakePacket;
|
||||
using ::mediapipe::Packet;
|
||||
} // namespace
|
||||
|
||||
struct freeDeleter {
|
||||
void operator()(void *ptr) { free(ptr); }
|
||||
};
|
||||
|
||||
@implementation MPPVisionPacketCreator
|
||||
|
||||
+ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error {
|
||||
std::unique_ptr<ImageFrame> imageFrame = [image imageFrameWithError:error];
|
||||
|
||||
if (!imageFrame) {
|
||||
return Packet();
|
||||
}
|
||||
|
||||
return MakePacket<Image>(std::move(imageFrame));
|
||||
}
|
||||
|
||||
@end
|
|
@ -4,23 +4,22 @@ licenses(["notice"])
|
|||
|
||||
objc_library(
|
||||
name = "MPPImageUtils",
|
||||
srcs = ["sources/MPPImage+Utils.m"],
|
||||
srcs = ["sources/MPPImage+Utils.mm"],
|
||||
hdrs = ["sources/MPPImage+Utils.h"],
|
||||
copts = [
|
||||
"-ObjC++",
|
||||
"-std=c++17",
|
||||
],
|
||||
module_name = "MPPImageUtils",
|
||||
sdk_frameworks = [
|
||||
"Accelerate",
|
||||
"CoreGraphics",
|
||||
"CoreImage",
|
||||
"CoreVideo",
|
||||
"UIKit",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
||||
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
||||
"//third_party/apple_frameworks:UIKit",
|
||||
"//third_party/apple_frameworks:Accelerate",
|
||||
"//third_party/apple_frameworks:CoreGraphics",
|
||||
"//third_party/apple_frameworks:CoreImage",
|
||||
"//third_party/apple_frameworks:CoreVideo",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -14,30 +14,27 @@
|
|||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/**
|
||||
* Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library.
|
||||
* Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`.
|
||||
*/
|
||||
@interface MPPImage (Utils)
|
||||
|
||||
/** Bitmap size of the image. */
|
||||
@property(nonatomic, readonly) CGSize bitmapSize;
|
||||
|
||||
/**
|
||||
* Returns the underlying uint8 pixel buffer of an `MPPImage`.
|
||||
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel
|
||||
* data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated
|
||||
* in the R, G, B channels.
|
||||
* Converts the `MPPImage` into a `mediapipe::ImageFrame`.
|
||||
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is
|
||||
* converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R,
|
||||
* G, B channels.
|
||||
*
|
||||
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
|
||||
* error will be saved.
|
||||
*
|
||||
* @return The underlying pixel buffer of the `MPPImage` or nil in case of errors.
|
||||
* @return An std::unique_ptr<mediapipe::ImageFrame> or `nullptr` in case of errors.
|
||||
*/
|
||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error;
|
||||
- (std::unique_ptr<mediapipe::ImageFrame>)imageFrameWithError:(NSError **)error;
|
||||
|
||||
@end
|
||||
|
||||
|
|
|
@ -22,6 +22,12 @@
|
|||
#import <CoreImage/CoreImage.h>
|
||||
#import <CoreVideo/CoreVideo.h>
|
||||
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
|
||||
namespace {
|
||||
using ::mediapipe::ImageFrame;
|
||||
}
|
||||
|
||||
@interface MPPPixelDataUtils : NSObject
|
||||
|
||||
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
|
||||
|
@ -35,21 +41,20 @@
|
|||
|
||||
@interface MPPCVPixelBufferUtils : NSObject
|
||||
|
||||
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error;
|
||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||
error:(NSError **)error;
|
||||
|
||||
@end
|
||||
|
||||
@interface MPPCGImageUtils : NSObject
|
||||
|
||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
|
||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
|
||||
|
||||
@end
|
||||
|
||||
@interface UIImage (RawPixelDataUtils)
|
||||
@interface UIImage (ImageFrameUtils)
|
||||
|
||||
@property(nonatomic, readonly) CGSize bitmapSize;
|
||||
|
||||
- (uint8_t *)pixelDataWithError:(NSError **)error;
|
||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error;
|
||||
|
||||
@end
|
||||
|
||||
|
@ -120,9 +125,14 @@
|
|||
|
||||
@implementation MPPCVPixelBufferUtils
|
||||
|
||||
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error {
|
||||
+ (std::unique_ptr<ImageFrame>)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||
error:(NSError **)error {
|
||||
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
|
||||
|
||||
size_t width = CVPixelBufferGetWidth(pixelBuffer);
|
||||
size_t height = CVPixelBufferGetHeight(pixelBuffer);
|
||||
size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer);
|
||||
|
||||
uint8_t *rgbPixelData = [MPPPixelDataUtils
|
||||
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
|
||||
withWidth:CVPixelBufferGetWidth(pixelBuffer)
|
||||
|
@ -133,19 +143,24 @@
|
|||
|
||||
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
|
||||
|
||||
return rgbPixelData;
|
||||
if (!rgbPixelData) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::unique_ptr<ImageFrame> imageFrame = absl::make_unique<ImageFrame>(
|
||||
::mediapipe::ImageFormat::SRGB, width, height, stride, static_cast<uint8 *>(rgbPixelData),
|
||||
/*deleter=*/free);
|
||||
|
||||
return imageFrame;
|
||||
}
|
||||
|
||||
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||
error:(NSError **)error {
|
||||
uint8_t *pixelData = NULL;
|
||||
|
||||
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
|
||||
|
||||
switch (pixelBufferFormat) {
|
||||
case kCVPixelFormatType_32BGRA: {
|
||||
pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error];
|
||||
break;
|
||||
return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error];
|
||||
}
|
||||
default: {
|
||||
[MPPCommonUtils createCustomError:error
|
||||
|
@ -155,20 +170,20 @@
|
|||
}
|
||||
}
|
||||
|
||||
return pixelData;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@implementation MPPCGImageUtils
|
||||
|
||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
|
||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
|
||||
size_t width = CGImageGetWidth(cgImage);
|
||||
size_t height = CGImageGetHeight(cgImage);
|
||||
|
||||
NSInteger bitsPerComponent = 8;
|
||||
NSInteger channelCount = 4;
|
||||
UInt8 *pixel_data_to_return = NULL;
|
||||
UInt8 *pixelDataToReturn = NULL;
|
||||
|
||||
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
|
||||
size_t bytesPerRow = channelCount * width;
|
||||
|
@ -191,7 +206,7 @@
|
|||
if (srcData) {
|
||||
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
|
||||
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
|
||||
pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
|
||||
pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
|
||||
withWidth:width
|
||||
height:height
|
||||
stride:bytesPerRow
|
||||
|
@ -204,38 +219,38 @@
|
|||
|
||||
CGColorSpaceRelease(colorSpace);
|
||||
|
||||
return pixel_data_to_return;
|
||||
std::unique_ptr<ImageFrame> imageFrame =
|
||||
absl::make_unique<ImageFrame>(mediapipe::ImageFormat::SRGB, (int)width, (int)height,
|
||||
(int)bytesPerRow, static_cast<uint8 *>(pixelDataToReturn),
|
||||
/*deleter=*/free);
|
||||
|
||||
return imageFrame;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@implementation UIImage (RawPixelDataUtils)
|
||||
|
||||
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
|
||||
uint8_t *pixelData = NULL;
|
||||
@implementation UIImage (ImageFrameUtils)
|
||||
|
||||
- (std::unique_ptr<ImageFrame>)imageFrameFromCIImageWithError:(NSError **)error {
|
||||
if (self.CIImage.pixelBuffer) {
|
||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer
|
||||
error:error];
|
||||
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error];
|
||||
|
||||
} else if (self.CIImage.CGImage) {
|
||||
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error];
|
||||
return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error];
|
||||
} else {
|
||||
[MPPCommonUtils createCustomError:error
|
||||
withCode:MPPTasksErrorCodeInvalidArgumentError
|
||||
description:@"CIImage should have CGImage or CVPixelBuffer info."];
|
||||
}
|
||||
|
||||
return pixelData;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
- (uint8_t *)pixelDataWithError:(NSError **)error {
|
||||
uint8_t *pixelData = nil;
|
||||
|
||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
|
||||
if (self.CGImage) {
|
||||
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error];
|
||||
return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error];
|
||||
} else if (self.CIImage) {
|
||||
pixelData = [self pixelDataFromCIImageWithError:error];
|
||||
return [self imageFrameFromCIImageWithError:error];
|
||||
} else {
|
||||
[MPPCommonUtils createCustomError:error
|
||||
withCode:MPPTasksErrorCodeInvalidArgumentError
|
||||
|
@ -243,46 +258,24 @@
|
|||
" CIImage or CGImage."];
|
||||
}
|
||||
|
||||
return pixelData;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
- (CGSize)bitmapSize {
|
||||
CGFloat width = 0;
|
||||
CGFloat height = 0;
|
||||
|
||||
if (self.CGImage) {
|
||||
width = CGImageGetWidth(self.CGImage);
|
||||
height = CGImageGetHeight(self.CGImage);
|
||||
} else if (self.CIImage.pixelBuffer) {
|
||||
width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
|
||||
height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
|
||||
} else if (self.CIImage.CGImage) {
|
||||
width = CGImageGetWidth(self.CIImage.CGImage);
|
||||
height = CGImageGetHeight(self.CIImage.CGImage);
|
||||
}
|
||||
return CGSizeMake(width, height);
|
||||
}
|
||||
@end
|
||||
|
||||
@implementation MPPImage (Utils)
|
||||
|
||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error {
|
||||
uint8_t *pixelData = NULL;
|
||||
|
||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
|
||||
switch (self.imageSourceType) {
|
||||
case MPPImageSourceTypeSampleBuffer: {
|
||||
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
|
||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer
|
||||
error:error];
|
||||
break;
|
||||
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error];
|
||||
}
|
||||
case MPPImageSourceTypePixelBuffer: {
|
||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error];
|
||||
break;
|
||||
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error];
|
||||
}
|
||||
case MPPImageSourceTypeImage: {
|
||||
pixelData = [self.image pixelDataWithError:error];
|
||||
break;
|
||||
return [self.image imageFrameWithError:error];
|
||||
}
|
||||
default:
|
||||
[MPPCommonUtils createCustomError:error
|
||||
|
@ -290,35 +283,7 @@
|
|||
description:@"Invalid source type for MPPImage."];
|
||||
}
|
||||
|
||||
return pixelData;
|
||||
}
|
||||
|
||||
- (CGSize)bitmapSize {
|
||||
CGFloat width = 0;
|
||||
CGFloat height = 0;
|
||||
|
||||
switch (self.imageSourceType) {
|
||||
case MPPImageSourceTypeSampleBuffer: {
|
||||
CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
|
||||
width = CVPixelBufferGetWidth(pixelBuffer);
|
||||
height = CVPixelBufferGetHeight(pixelBuffer);
|
||||
break;
|
||||
}
|
||||
case MPPImageSourceTypePixelBuffer: {
|
||||
width = CVPixelBufferGetWidth(self.pixelBuffer);
|
||||
height = CVPixelBufferGetHeight(self.pixelBuffer);
|
||||
break;
|
||||
}
|
||||
case MPPImageSourceTypeImage: {
|
||||
width = self.image.bitmapSize.width;
|
||||
height = self.image.bitmapSize.height;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return CGSizeMake(width, height);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@end
|
38
mediapipe/tasks/ios/vision/image_classifier/BUILD
Normal file
38
mediapipe/tasks/ios/vision/image_classifier/BUILD
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
objc_library(
|
||||
name = "MPPImageClassifierResult",
|
||||
srcs = ["sources/MPPImageClassifierResult.m"],
|
||||
hdrs = ["sources/MPPImageClassifierResult.h"],
|
||||
deps = [
|
||||
"//mediapipe/tasks/ios/components/containers:MPPClassificationResult",
|
||||
"//mediapipe/tasks/ios/core:MPPTaskResult",
|
||||
],
|
||||
)
|
||||
|
||||
objc_library(
|
||||
name = "MPPImageClassifierOptions",
|
||||
srcs = ["sources/MPPImageClassifierOptions.m"],
|
||||
hdrs = ["sources/MPPImageClassifierOptions.h"],
|
||||
deps = [
|
||||
":MPPImageClassifierResult",
|
||||
"//mediapipe/tasks/ios/core:MPPTaskOptions",
|
||||
"//mediapipe/tasks/ios/vision/core:MPPRunningMode",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright 2023 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h"
|
||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
|
||||
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/**
|
||||
* Options for setting up a `MPPImageClassifier`.
|
||||
*/
|
||||
NS_SWIFT_NAME(ImageClassifierOptions)
|
||||
@interface MPPImageClassifierOptions : MPPTaskOptions <NSCopying>
|
||||
|
||||
@property(nonatomic) MPPRunningMode runningMode;
|
||||
|
||||
/**
|
||||
* The user-defined result callback for processing live stream data. The result callback should only
|
||||
* be specified when the running mode is set to the live stream mode.
|
||||
*/
|
||||
@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error);
|
||||
|
||||
/**
|
||||
* The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults
|
||||
* to English.
|
||||
*/
|
||||
@property(nonatomic, copy) NSString *displayNamesLocale;
|
||||
|
||||
/**
|
||||
* The maximum number of top-scored classification results to return. If < 0, all available results
|
||||
* will be returned. If 0, an invalid argument error is returned.
|
||||
*/
|
||||
@property(nonatomic) NSInteger maxResults;
|
||||
|
||||
/**
|
||||
* Score threshold to override the one provided in the model metadata (if any). Results below this
|
||||
* value are rejected.
|
||||
*/
|
||||
@property(nonatomic) float scoreThreshold;
|
||||
|
||||
/**
|
||||
* The allowlist of category names. If non-empty, detection results whose category name is not in
|
||||
* this set will be filtered out. Duplicate or unknown category names are ignored. Mutually
|
||||
* exclusive with categoryDenylist.
|
||||
*/
|
||||
@property(nonatomic, copy) NSArray<NSString *> *categoryAllowlist;
|
||||
|
||||
/**
|
||||
* The denylist of category names. If non-empty, detection results whose category name is in this
|
||||
* set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive
|
||||
* with categoryAllowlist.
|
||||
*/
|
||||
@property(nonatomic, copy) NSArray<NSString *> *categoryDenylist;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user