Merge branch 'google:master' into master
This commit is contained in:
commit
6e7018b826
11
README.md
11
README.md
|
@ -19,6 +19,17 @@ ML solutions for live and streaming media.
|
||||||
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
||||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## ML solutions in MediaPipe
|
## ML solutions in MediaPipe
|
||||||
|
|
||||||
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
||||||
|
|
13
docs/_layouts/forward.html
Normal file
13
docs/_layouts/forward.html
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<meta http-equiv="refresh" content="0;url={{ page.target }}"/>
|
||||||
|
<link rel="canonical" href="{{ page.target }}"/>
|
||||||
|
<title>Redirecting</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>This page now lives on https://developers.google.com/mediapipe/. If you aren't automatically
|
||||||
|
redirected, follow this
|
||||||
|
<a href="{{ page.target }}">link</a>.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -593,3 +593,105 @@ CalculatorGraphConfig BuildGraph() {
|
||||||
return graph.GetConfig();
|
return graph.GetConfig();
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Separate nodes for better readability
|
||||||
|
|
||||||
|
```c++ {.bad}
|
||||||
|
CalculatorGraphConfig BuildGraph() {
|
||||||
|
Graph graph;
|
||||||
|
|
||||||
|
// Inputs.
|
||||||
|
Stream<A> a = graph.In(0).Cast<A>();
|
||||||
|
auto& node1 = graph.AddNode("Calculator1");
|
||||||
|
a.ConnectTo(node1.In("INPUT"));
|
||||||
|
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
|
||||||
|
auto& node2 = graph.AddNode("Calculator2");
|
||||||
|
b.ConnectTo(node2.In("INPUT"));
|
||||||
|
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
|
||||||
|
auto& node3 = graph.AddNode("Calculator3");
|
||||||
|
b.ConnectTo(node3.In("INPUT_B"));
|
||||||
|
c.ConnectTo(node3.In("INPUT_C"));
|
||||||
|
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
|
||||||
|
auto& node4 = graph.AddNode("Calculator4");
|
||||||
|
b.ConnectTo(node4.In("INPUT_B"));
|
||||||
|
c.ConnectTo(node4.In("INPUT_C"));
|
||||||
|
d.ConnectTo(node4.In("INPUT_D"));
|
||||||
|
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
|
||||||
|
// Outputs.
|
||||||
|
b.SetName("b").ConnectTo(graph.Out(0));
|
||||||
|
c.SetName("c").ConnectTo(graph.Out(1));
|
||||||
|
d.SetName("d").ConnectTo(graph.Out(2));
|
||||||
|
e.SetName("e").ConnectTo(graph.Out(3));
|
||||||
|
|
||||||
|
return graph.GetConfig();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above code, it can be hard to grasp the idea where each node begins and
|
||||||
|
ends. To improve this and help your code readers, you can simply have blank
|
||||||
|
lines before and after each node:
|
||||||
|
|
||||||
|
```c++ {.good}
|
||||||
|
CalculatorGraphConfig BuildGraph() {
|
||||||
|
Graph graph;
|
||||||
|
|
||||||
|
// Inputs.
|
||||||
|
Stream<A> a = graph.In(0).Cast<A>();
|
||||||
|
|
||||||
|
auto& node1 = graph.AddNode("Calculator1");
|
||||||
|
a.ConnectTo(node1.In("INPUT"));
|
||||||
|
Stream<B> b = node1.Out("OUTPUT").Cast<B>();
|
||||||
|
|
||||||
|
auto& node2 = graph.AddNode("Calculator2");
|
||||||
|
b.ConnectTo(node2.In("INPUT"));
|
||||||
|
Stream<C> c = node2.Out("OUTPUT").Cast<C>();
|
||||||
|
|
||||||
|
auto& node3 = graph.AddNode("Calculator3");
|
||||||
|
b.ConnectTo(node3.In("INPUT_B"));
|
||||||
|
c.ConnectTo(node3.In("INPUT_C"));
|
||||||
|
Stream<D> d = node3.Out("OUTPUT").Cast<D>();
|
||||||
|
|
||||||
|
auto& node4 = graph.AddNode("Calculator4");
|
||||||
|
b.ConnectTo(node4.In("INPUT_B"));
|
||||||
|
c.ConnectTo(node4.In("INPUT_C"));
|
||||||
|
d.ConnectTo(node4.In("INPUT_D"));
|
||||||
|
Stream<E> e = node4.Out("OUTPUT").Cast<E>();
|
||||||
|
|
||||||
|
// Outputs.
|
||||||
|
b.SetName("b").ConnectTo(graph.Out(0));
|
||||||
|
c.SetName("c").ConnectTo(graph.Out(1));
|
||||||
|
d.SetName("d").ConnectTo(graph.Out(2));
|
||||||
|
e.SetName("e").ConnectTo(graph.Out(3));
|
||||||
|
|
||||||
|
return graph.GetConfig();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Also, the above representation matches `CalculatorGraphConfig` proto
|
||||||
|
representation better.
|
||||||
|
|
||||||
|
If you extract nodes into utility functions, they are scoped within functions
|
||||||
|
already and it's clear where they begin and end, so it's completely fine to
|
||||||
|
have:
|
||||||
|
|
||||||
|
```c++ {.good}
|
||||||
|
CalculatorGraphConfig BuildGraph() {
|
||||||
|
Graph graph;
|
||||||
|
|
||||||
|
// Inputs.
|
||||||
|
Stream<A> a = graph.In(0).Cast<A>();
|
||||||
|
|
||||||
|
Stream<B> b = RunCalculator1(a, graph);
|
||||||
|
Stream<C> c = RunCalculator2(b, graph);
|
||||||
|
Stream<D> d = RunCalculator3(b, c, graph);
|
||||||
|
Stream<E> e = RunCalculator4(b, c, d, graph);
|
||||||
|
|
||||||
|
// Outputs.
|
||||||
|
b.SetName("b").ConnectTo(graph.Out(0));
|
||||||
|
c.SetName("c").ConnectTo(graph.Out(1));
|
||||||
|
d.SetName("d").ConnectTo(graph.Out(2));
|
||||||
|
e.SetName("e").ConnectTo(graph.Out(3));
|
||||||
|
|
||||||
|
return graph.GetConfig();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/calculators
|
||||||
title: Calculators
|
title: Calculators
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 1
|
nav_order: 1
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/overview
|
||||||
title: Framework Concepts
|
title: Framework Concepts
|
||||||
nav_order: 5
|
nav_order: 5
|
||||||
has_children: true
|
has_children: true
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/gpu
|
||||||
title: GPU
|
title: GPU
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 5
|
nav_order: 5
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/graphs
|
||||||
title: Graphs
|
title: Graphs
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 2
|
nav_order: 2
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/packets
|
||||||
title: Packets
|
title: Packets
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 3
|
nav_order: 3
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/realtime_streams
|
||||||
title: Real-time Streams
|
title: Real-time Streams
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 6
|
nav_order: 6
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/framework_concepts/synchronization
|
||||||
title: Synchronization
|
title: Synchronization
|
||||||
parent: Framework Concepts
|
parent: Framework Concepts
|
||||||
nav_order: 4
|
nav_order: 4
|
||||||
|
|
|
@ -13,6 +13,17 @@ nav_order: 2
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||||
|
the new site, but will remain available in the source code repository on an
|
||||||
|
as-is basis.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
MediaPipe Android Solution APIs (currently in alpha) are available in:
|
MediaPipe Android Solution APIs (currently in alpha) are available in:
|
||||||
|
|
||||||
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
|
* [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
|
||||||
|
|
|
@ -12,6 +12,17 @@ nav_exclude: true
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||||
|
the new site, but will remain available in the source code repository on an
|
||||||
|
as-is basis.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
### Android
|
### Android
|
||||||
|
|
||||||
Please see these [instructions](./android.md).
|
Please see these [instructions](./android.md).
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/getting_started/faq
|
||||||
title: FAQ
|
title: FAQ
|
||||||
parent: Getting Started
|
parent: Getting Started
|
||||||
nav_order: 9
|
nav_order: 9
|
||||||
|
@ -59,7 +60,7 @@ The second approach allows up to [`max_in_flight`] invocations of the
|
||||||
packets from [`CalculatorBase::Process`] are automatically ordered by timestamp
|
packets from [`CalculatorBase::Process`] are automatically ordered by timestamp
|
||||||
before they are passed along to downstream calculators.
|
before they are passed along to downstream calculators.
|
||||||
|
|
||||||
With either aproach, you must be aware that the calculator running in parallel
|
With either approach, you must be aware that the calculator running in parallel
|
||||||
cannot maintain internal state in the same way as a normal sequential
|
cannot maintain internal state in the same way as a normal sequential
|
||||||
calculator.
|
calculator.
|
||||||
|
|
||||||
|
|
|
@ -11,3 +11,14 @@ has_children: true
|
||||||
1. TOC
|
1. TOC
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||||
|
the new site, but will remain available in the source code repository on an
|
||||||
|
as-is basis.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/getting_started/gpu_support
|
||||||
title: GPU Support
|
title: GPU Support
|
||||||
parent: Getting Started
|
parent: Getting Started
|
||||||
nav_order: 7
|
nav_order: 7
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/getting_started/help
|
||||||
title: Getting Help
|
title: Getting Help
|
||||||
parent: Getting Started
|
parent: Getting Started
|
||||||
nav_order: 8
|
nav_order: 8
|
||||||
|
@ -37,8 +38,8 @@ If you open a GitHub issue, here is our policy:
|
||||||
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
|
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
|
||||||
- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
|
- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
|
||||||
- **Bazel version**:
|
- **Bazel version**:
|
||||||
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev enviroment)**:
|
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev environment)**:
|
||||||
- **Xcode & Tulsi version (if issue is related to building in mobile dev enviroment)**:
|
- **Xcode & Tulsi version (if issue is related to building in mobile dev environment)**:
|
||||||
- **Exact steps to reproduce**:
|
- **Exact steps to reproduce**:
|
||||||
|
|
||||||
### Describe the problem
|
### Describe the problem
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/getting_started/install
|
||||||
title: Installation
|
title: Installation
|
||||||
parent: Getting Started
|
parent: Getting Started
|
||||||
nav_order: 6
|
nav_order: 6
|
||||||
|
|
|
@ -12,6 +12,17 @@ nav_order: 4
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023. This content will not be moved to
|
||||||
|
the new site, but will remain available in the source code repository on an
|
||||||
|
as-is basis.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Ready-to-use JavaScript Solutions
|
## Ready-to-use JavaScript Solutions
|
||||||
|
|
||||||
MediaPipe currently offers the following solutions:
|
MediaPipe currently offers the following solutions:
|
||||||
|
@ -33,7 +44,7 @@ snippets.
|
||||||
|
|
||||||
| Browser | Platform | Notes |
|
| Browser | Platform | Notes |
|
||||||
| ------- | ----------------------- | -------------------------------------- |
|
| ------- | ----------------------- | -------------------------------------- |
|
||||||
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
|
| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuchsia |
|
||||||
| | | unsupported. |
|
| | | unsupported. |
|
||||||
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
| Chrome | iOS | Camera unavailable in Chrome on iOS. |
|
||||||
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
| Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / |
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/getting_started/troubleshooting
|
||||||
title: Troubleshooting
|
title: Troubleshooting
|
||||||
parent: Getting Started
|
parent: Getting Started
|
||||||
nav_order: 10
|
nav_order: 10
|
||||||
|
@ -65,7 +66,7 @@ WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/gith
|
||||||
```
|
```
|
||||||
|
|
||||||
usually indicates that Bazel fails to download necessary dependency repositories
|
usually indicates that Bazel fails to download necessary dependency repositories
|
||||||
that MediaPipe needs. MedaiPipe has several dependency repositories that are
|
that MediaPipe needs. MediaPipe has several dependency repositories that are
|
||||||
hosted by Google sites. In some regions, you may need to set up a network proxy
|
hosted by Google sites. In some regions, you may need to set up a network proxy
|
||||||
or use a VPN to access those resources. You may also need to append
|
or use a VPN to access those resources. You may also need to append
|
||||||
`--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"`
|
`--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"`
|
||||||
|
|
|
@ -19,6 +19,17 @@ ML solutions for live and streaming media.
|
||||||
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
|
||||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
|
||||||
|
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
|
||||||
|
as the primary developer documentation
|
||||||
|
site for MediaPipe starting April 3, 2023.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## ML solutions in MediaPipe
|
## ML solutions in MediaPipe
|
||||||
|
|
||||||
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 14
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This
|
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 10
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe Box Tracking has been powering real-time tracking in
|
MediaPipe Box Tracking has been powering real-time tracking in
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 1
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe Face Detection is an ultrafast face detection solution that comes with
|
MediaPipe Face Detection is an ultrafast face detection solution that comes with
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 2
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
|
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
|
||||||
|
@ -133,7 +143,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
|
||||||
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
||||||
detection in the screen coordinate space: the X- and Y- coordinates are
|
detection in the screen coordinate space: the X- and Y- coordinates are
|
||||||
normalized screen coordinates, while the Z coordinate is relative and is scaled
|
normalized screen coordinates, while the Z coordinate is relative and is scaled
|
||||||
as the X coodinate under the
|
as the X coordinate under the
|
||||||
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
|
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
|
||||||
This format is well-suited for some applications, however it does not directly
|
This format is well-suited for some applications, however it does not directly
|
||||||
enable the full spectrum of augmented reality (AR) features like aligning a
|
enable the full spectrum of augmented reality (AR) features like aligning a
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 8
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
![hair_segmentation_android_gpu_gif](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu.gif)
|
![hair_segmentation_android_gpu_gif](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu.gif)
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 4
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The ability to perceive the shape and motion of hands can be a vital component
|
The ability to perceive the shape and motion of hands can be a vital component
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 6
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Live perception of simultaneous [human pose](./pose.md),
|
Live perception of simultaneous [human pose](./pose.md),
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 11
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Augmented Reality (AR) technology creates fun, engaging, and immersive user
|
Augmented Reality (AR) technology creates fun, engaging, and immersive user
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 3
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
A wide range of real-world applications, including computational photography
|
A wide range of real-world applications, including computational photography
|
||||||
|
@ -38,7 +48,7 @@ camera, in real-time, without the need for specialized hardware. Through use of
|
||||||
iris landmarks, the solution is also able to determine the metric distance
|
iris landmarks, the solution is also able to determine the metric distance
|
||||||
between the subject and the camera with relative error less than 10%. Note that
|
between the subject and the camera with relative error less than 10%. Note that
|
||||||
iris tracking does not infer the location at which people are looking, nor does
|
iris tracking does not infer the location at which people are looking, nor does
|
||||||
it provide any form of identity recognition. With the cross-platfrom capability
|
it provide any form of identity recognition. With the cross-platform capability
|
||||||
of the MediaPipe framework, MediaPipe Iris can run on most modern
|
of the MediaPipe framework, MediaPipe Iris can run on most modern
|
||||||
[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the
|
[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the
|
||||||
[web](#web).
|
[web](#web).
|
||||||
|
@ -99,7 +109,7 @@ You can also find more details in this
|
||||||
### Iris Landmark Model
|
### Iris Landmark Model
|
||||||
|
|
||||||
The iris model takes an image patch of the eye region and estimates both the eye
|
The iris model takes an image patch of the eye region and estimates both the eye
|
||||||
landmarks (along the eyelid) and iris landmarks (along ths iris contour). You
|
landmarks (along the eyelid) and iris landmarks (along this iris contour). You
|
||||||
can find more details in this [paper](https://arxiv.org/abs/2006.11341).
|
can find more details in this [paper](https://arxiv.org/abs/2006.11341).
|
||||||
|
|
||||||
![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) |
|
![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) |
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 13
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe KNIFT is a template-based feature matching solution using KNIFT
|
MediaPipe KNIFT is a template-based feature matching solution using KNIFT
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 15
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe is a useful and general framework for media processing that can
|
MediaPipe is a useful and general framework for media processing that can
|
||||||
|
@ -85,7 +95,7 @@ process new data sets, in the documentation of
|
||||||
|
|
||||||
MediaSequence uses SequenceExamples as the format of both inputs and
|
MediaSequence uses SequenceExamples as the format of both inputs and
|
||||||
outputs. Annotations are encoded as inputs in a SequenceExample of metadata
|
outputs. Annotations are encoded as inputs in a SequenceExample of metadata
|
||||||
that defines the labels and the path to the cooresponding video file. This
|
that defines the labels and the path to the corresponding video file. This
|
||||||
metadata is passed as input to the C++ `media_sequence_demo` binary, and the
|
metadata is passed as input to the C++ `media_sequence_demo` binary, and the
|
||||||
output is a SequenceExample filled with images and annotations ready for
|
output is a SequenceExample filled with images and annotations ready for
|
||||||
model training.
|
model training.
|
||||||
|
|
|
@ -12,6 +12,20 @@ nav_order: 30
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for
|
||||||
|
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
as of March 1, 2023. All other
|
||||||
|
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
to a new MediaPipe Solution. The code repository and prebuilt binaries for all
|
||||||
|
MediaPipe Legacy Solutions will continue to be provided on an as-is basis.
|
||||||
|
We encourage you to check out the new MediaPipe Solutions at:
|
||||||
|
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
|
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
|
||||||
|
|
||||||
* Short-range model (best for faces within 2 meters from the camera):
|
* Short-range model (best for faces within 2 meters from the camera):
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 9
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
![object_detection_android_gpu.gif](https://mediapipe.dev/images/mobile/object_detection_android_gpu.gif)
|
![object_detection_android_gpu.gif](https://mediapipe.dev/images/mobile/object_detection_android_gpu.gif)
|
||||||
|
|
||||||
## Example Apps
|
## Example Apps
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 12
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
MediaPipe Objectron is a mobile real-time 3D object detection solution for
|
MediaPipe Objectron is a mobile real-time 3D object detection solution for
|
||||||
|
@ -170,7 +180,7 @@ and a
|
||||||
The detection subgraph performs ML inference only once every few frames to
|
The detection subgraph performs ML inference only once every few frames to
|
||||||
reduce computation load, and decodes the output tensor to a FrameAnnotation that
|
reduce computation load, and decodes the output tensor to a FrameAnnotation that
|
||||||
contains nine keypoints: the 3D bounding box's center and its eight vertices.
|
contains nine keypoints: the 3D bounding box's center and its eight vertices.
|
||||||
The tracking subgraph runs every frame, using the box traker in
|
The tracking subgraph runs every frame, using the box tracker in
|
||||||
[MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly
|
[MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly
|
||||||
enclosing the projection of the 3D bounding box, and lifts the tracked 2D
|
enclosing the projection of the 3D bounding box, and lifts the tracked 2D
|
||||||
keypoints to 3D with
|
keypoints to 3D with
|
||||||
|
@ -613,7 +623,7 @@ z_ndc = 1 / Z
|
||||||
|
|
||||||
### Pixel Space
|
### Pixel Space
|
||||||
|
|
||||||
In this API we set upper-left coner of an image as the origin of pixel
|
In this API we set upper-left corner of an image as the origin of pixel
|
||||||
coordinate. One can convert from NDC to pixel space as follows:
|
coordinate. One can convert from NDC to pixel space as follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -20,6 +20,16 @@ nav_order: 5
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Human pose estimation from video plays a critical role in various applications
|
Human pose estimation from video plays a critical role in various applications
|
||||||
|
|
|
@ -19,6 +19,16 @@ nav_order: 1
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
One of the applications
|
One of the applications
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 7
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
|
||||||
|
Solution. For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
*Fig 1. Example of MediaPipe Selfie Segmentation.* |
|
*Fig 1. Example of MediaPipe Selfie Segmentation.* |
|
||||||
|
|
|
@ -13,7 +13,21 @@ has_toc: false
|
||||||
{:toc}
|
{:toc}
|
||||||
---
|
---
|
||||||
|
|
||||||
Note: These solutions are no longer actively maintained. Consider using or migrating to the new [MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide).
|
**Attention:** *Thank you for your interest in MediaPipe Solutions. We have
|
||||||
|
ended support for
|
||||||
|
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
as of March 1, 2023. All other
|
||||||
|
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
to a new MediaPipe Solution. The
|
||||||
|
[code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
|
||||||
|
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to
|
||||||
|
be provided on an as-is basis. We encourage you to check out the new MediaPipe
|
||||||
|
Solutions at:
|
||||||
|
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on June 1, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
MediaPipe offers open source cross-platform, customizable ML solutions for live
|
MediaPipe offers open source cross-platform, customizable ML solutions for live
|
||||||
and streaming media.
|
and streaming media.
|
||||||
|
|
|
@ -18,6 +18,16 @@ nav_order: 16
|
||||||
</details>
|
</details>
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Attention:** *Thank you for your interest in MediaPipe Solutions.
|
||||||
|
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
|
||||||
|
For more information, see the new
|
||||||
|
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
|
||||||
|
site.*
|
||||||
|
|
||||||
|
*This notice and web page will be removed on April 3, 2023.*
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
MediaPipe is a useful and general framework for media processing that can assist
|
MediaPipe is a useful and general framework for media processing that can assist
|
||||||
with research, development, and deployment of ML models. This example focuses on
|
with research, development, and deployment of ML models. This example focuses on
|
||||||
model development by demonstrating how to prepare training data and do model
|
model development by demonstrating how to prepare training data and do model
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
---
|
---
|
||||||
layout: default
|
layout: forward
|
||||||
|
target: https://developers.google.com/mediapipe/framework/tools/visualizer
|
||||||
title: Visualizer
|
title: Visualizer
|
||||||
parent: Tools
|
parent: Tools
|
||||||
nav_order: 1
|
nav_order: 1
|
||||||
|
|
|
@ -48,7 +48,6 @@ class MergeToVectorCalculator : public Node {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) {
|
absl::Status Process(CalculatorContext* cc) {
|
||||||
const int input_num = kIn(cc).Count();
|
|
||||||
std::vector<T> output_vector;
|
std::vector<T> output_vector;
|
||||||
for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) {
|
for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) {
|
||||||
const auto& elem = *it;
|
const auto& elem = *it;
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type")
|
load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type")
|
||||||
|
|
||||||
|
@ -23,6 +24,14 @@ package(
|
||||||
|
|
||||||
licenses(["notice"])
|
licenses(["notice"])
|
||||||
|
|
||||||
|
selects.config_setting_group(
|
||||||
|
name = "ios_or_disable_gpu",
|
||||||
|
match_any = [
|
||||||
|
"//mediapipe/gpu:disable_gpu",
|
||||||
|
"//mediapipe:ios",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_proto_library(
|
mediapipe_proto_library(
|
||||||
name = "detection_proto",
|
name = "detection_proto",
|
||||||
srcs = ["detection.proto"],
|
srcs = ["detection.proto"],
|
||||||
|
@ -336,9 +345,7 @@ cc_library(
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"//mediapipe/gpu:gl_texture_buffer",
|
"//mediapipe/gpu:gl_texture_buffer",
|
||||||
],
|
],
|
||||||
"//mediapipe:ios": [
|
"ios_or_disable_gpu": [],
|
||||||
],
|
|
||||||
"//mediapipe/gpu:disable_gpu": [],
|
|
||||||
}) + select({
|
}) + select({
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
"//mediapipe:apple": [
|
"//mediapipe:apple": [
|
||||||
|
|
|
@ -18,15 +18,16 @@
|
||||||
|
|
||||||
#include "absl/strings/str_cat.h"
|
#include "absl/strings/str_cat.h"
|
||||||
#include "absl/strings/str_join.h"
|
#include "absl/strings/str_join.h"
|
||||||
|
#include "absl/strings/string_view.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace tool {
|
namespace tool {
|
||||||
|
|
||||||
absl::Status StatusInvalid(const std::string& message) {
|
absl::Status StatusInvalid(absl::string_view message) {
|
||||||
return absl::Status(absl::StatusCode::kInvalidArgument, message);
|
return absl::Status(absl::StatusCode::kInvalidArgument, message);
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status StatusFail(const std::string& message) {
|
absl::Status StatusFail(absl::string_view message) {
|
||||||
return absl::Status(absl::StatusCode::kUnknown, message);
|
return absl::Status(absl::StatusCode::kUnknown, message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,12 +36,12 @@ absl::Status StatusStop() {
|
||||||
"mediapipe::tool::StatusStop()");
|
"mediapipe::tool::StatusStop()");
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status AddStatusPrefix(const std::string& prefix,
|
absl::Status AddStatusPrefix(absl::string_view prefix,
|
||||||
const absl::Status& status) {
|
const absl::Status& status) {
|
||||||
return absl::Status(status.code(), absl::StrCat(prefix, status.message()));
|
return absl::Status(status.code(), absl::StrCat(prefix, status.message()));
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status CombinedStatus(const std::string& general_comment,
|
absl::Status CombinedStatus(absl::string_view general_comment,
|
||||||
const std::vector<absl::Status>& statuses) {
|
const std::vector<absl::Status>& statuses) {
|
||||||
// The final error code is absl::StatusCode::kUnknown if not all
|
// The final error code is absl::StatusCode::kUnknown if not all
|
||||||
// the error codes are the same. Otherwise it is the same error code
|
// the error codes are the same. Otherwise it is the same error code
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "absl/base/macros.h"
|
#include "absl/base/macros.h"
|
||||||
|
#include "absl/strings/string_view.h"
|
||||||
#include "mediapipe/framework/port/status.h"
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
@ -34,16 +35,16 @@ absl::Status StatusStop();
|
||||||
// Return a status which signals an invalid initial condition (for
|
// Return a status which signals an invalid initial condition (for
|
||||||
// example an InputSidePacket does not include all necessary fields).
|
// example an InputSidePacket does not include all necessary fields).
|
||||||
ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.")
|
ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.")
|
||||||
absl::Status StatusInvalid(const std::string& error_message);
|
absl::Status StatusInvalid(absl::string_view error_message);
|
||||||
|
|
||||||
// Return a status which signals that something unexpectedly failed.
|
// Return a status which signals that something unexpectedly failed.
|
||||||
ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.")
|
ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.")
|
||||||
absl::Status StatusFail(const std::string& error_message);
|
absl::Status StatusFail(absl::string_view error_message);
|
||||||
|
|
||||||
// Prefixes the given string to the error message in status.
|
// Prefixes the given string to the error message in status.
|
||||||
// This function should be considered internal to the framework.
|
// This function should be considered internal to the framework.
|
||||||
// TODO Replace usage of AddStatusPrefix with util::Annotate().
|
// TODO Replace usage of AddStatusPrefix with util::Annotate().
|
||||||
absl::Status AddStatusPrefix(const std::string& prefix,
|
absl::Status AddStatusPrefix(absl::string_view prefix,
|
||||||
const absl::Status& status);
|
const absl::Status& status);
|
||||||
|
|
||||||
// Combine a vector of absl::Status into a single composite status.
|
// Combine a vector of absl::Status into a single composite status.
|
||||||
|
@ -51,7 +52,7 @@ absl::Status AddStatusPrefix(const std::string& prefix,
|
||||||
// will be returned.
|
// will be returned.
|
||||||
// This function should be considered internal to the framework.
|
// This function should be considered internal to the framework.
|
||||||
// TODO Move this function to somewhere with less visibility.
|
// TODO Move this function to somewhere with less visibility.
|
||||||
absl::Status CombinedStatus(const std::string& general_comment,
|
absl::Status CombinedStatus(absl::string_view general_comment,
|
||||||
const std::vector<absl::Status>& statuses);
|
const std::vector<absl::Status>& statuses);
|
||||||
|
|
||||||
} // namespace tool
|
} // namespace tool
|
||||||
|
|
|
@ -15,7 +15,9 @@
|
||||||
package com.google.mediapipe.components;
|
package com.google.mediapipe.components;
|
||||||
|
|
||||||
import static java.lang.Math.max;
|
import static java.lang.Math.max;
|
||||||
|
import static java.lang.Math.min;
|
||||||
|
|
||||||
|
import android.graphics.Bitmap;
|
||||||
import android.graphics.SurfaceTexture;
|
import android.graphics.SurfaceTexture;
|
||||||
import android.opengl.GLES11Ext;
|
import android.opengl.GLES11Ext;
|
||||||
import android.opengl.GLES20;
|
import android.opengl.GLES20;
|
||||||
|
@ -25,9 +27,12 @@ import android.util.Log;
|
||||||
import com.google.mediapipe.framework.TextureFrame;
|
import com.google.mediapipe.framework.TextureFrame;
|
||||||
import com.google.mediapipe.glutil.CommonShaders;
|
import com.google.mediapipe.glutil.CommonShaders;
|
||||||
import com.google.mediapipe.glutil.ShaderUtil;
|
import com.google.mediapipe.glutil.ShaderUtil;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
import java.nio.FloatBuffer;
|
import java.nio.FloatBuffer;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
import javax.microedition.khronos.egl.EGLConfig;
|
import javax.microedition.khronos.egl.EGLConfig;
|
||||||
import javax.microedition.khronos.opengles.GL10;
|
import javax.microedition.khronos.opengles.GL10;
|
||||||
|
@ -44,6 +49,13 @@ import javax.microedition.khronos.opengles.GL10;
|
||||||
* {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}).
|
* {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}).
|
||||||
*/
|
*/
|
||||||
public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
|
/**
|
||||||
|
* Listener for Bitmap capture requests.
|
||||||
|
*/
|
||||||
|
public interface BitmapCaptureListener {
|
||||||
|
void onBitmapCaptured(Bitmap result);
|
||||||
|
}
|
||||||
|
|
||||||
private static final String TAG = "DemoRenderer";
|
private static final String TAG = "DemoRenderer";
|
||||||
private static final int ATTRIB_POSITION = 1;
|
private static final int ATTRIB_POSITION = 1;
|
||||||
private static final int ATTRIB_TEXTURE_COORDINATE = 2;
|
private static final int ATTRIB_TEXTURE_COORDINATE = 2;
|
||||||
|
@ -56,12 +68,32 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
private int frameUniform;
|
private int frameUniform;
|
||||||
private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES;
|
private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES;
|
||||||
private int textureTransformUniform;
|
private int textureTransformUniform;
|
||||||
|
private boolean shouldFitToWidth = false;
|
||||||
// Controls the alignment between frame size and surface size, 0.5f default is centered.
|
// Controls the alignment between frame size and surface size, 0.5f default is centered.
|
||||||
private float alignmentHorizontal = 0.5f;
|
private float alignmentHorizontal = 0.5f;
|
||||||
private float alignmentVertical = 0.5f;
|
private float alignmentVertical = 0.5f;
|
||||||
private float[] textureTransformMatrix = new float[16];
|
private float[] textureTransformMatrix = new float[16];
|
||||||
private SurfaceTexture surfaceTexture = null;
|
private SurfaceTexture surfaceTexture = null;
|
||||||
private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>();
|
private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>();
|
||||||
|
private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean();
|
||||||
|
private BitmapCaptureListener bitmapCaptureListener;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link BitmapCaptureListener}.
|
||||||
|
*/
|
||||||
|
public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) {
|
||||||
|
this.bitmapCaptureListener = bitmapCaptureListener;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request to capture Bitmap of the next frame.
|
||||||
|
*
|
||||||
|
* The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note
|
||||||
|
* this is an expensive operation and the result may not be available for a while.
|
||||||
|
*/
|
||||||
|
public void captureNextFrameBitmap() {
|
||||||
|
captureNextFrameBitmap.set(true);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onSurfaceCreated(GL10 gl, EGLConfig config) {
|
public void onSurfaceCreated(GL10 gl, EGLConfig config) {
|
||||||
|
@ -147,6 +179,31 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
|
|
||||||
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
|
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
|
||||||
ShaderUtil.checkGlError("glDrawArrays");
|
ShaderUtil.checkGlError("glDrawArrays");
|
||||||
|
|
||||||
|
// Capture Bitmap if requested.
|
||||||
|
BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener;
|
||||||
|
if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) {
|
||||||
|
int bitmapSize = surfaceWidth * surfaceHeight;
|
||||||
|
ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4);
|
||||||
|
byteBuffer.order(ByteOrder.nativeOrder());
|
||||||
|
GLES20.glReadPixels(
|
||||||
|
0, 0, surfaceWidth, surfaceHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, byteBuffer);
|
||||||
|
int[] pixelBuffer = new int[bitmapSize];
|
||||||
|
byteBuffer.asIntBuffer().get(pixelBuffer);
|
||||||
|
for (int i = 0; i < bitmapSize; i++) {
|
||||||
|
// Swap R and B channels.
|
||||||
|
pixelBuffer[i] =
|
||||||
|
(pixelBuffer[i] & 0xff00ff00)
|
||||||
|
| ((pixelBuffer[i] & 0x000000ff) << 16)
|
||||||
|
| ((pixelBuffer[i] & 0x00ff0000) >> 16);
|
||||||
|
}
|
||||||
|
Bitmap bitmap = Bitmap.createBitmap(surfaceWidth, surfaceHeight, Bitmap.Config.ARGB_8888);
|
||||||
|
bitmap.setPixels(
|
||||||
|
pixelBuffer, /* offset= */bitmapSize - surfaceWidth, /* stride= */-surfaceWidth,
|
||||||
|
/* x= */0, /* y= */0, surfaceWidth, surfaceHeight);
|
||||||
|
bitmapCaptureListener.onBitmapCaptured(bitmap);
|
||||||
|
}
|
||||||
|
|
||||||
GLES20.glBindTexture(textureTarget, 0);
|
GLES20.glBindTexture(textureTarget, 0);
|
||||||
ShaderUtil.checkGlError("unbind surfaceTexture");
|
ShaderUtil.checkGlError("unbind surfaceTexture");
|
||||||
|
|
||||||
|
@ -158,13 +215,17 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
// TODO: compute scale from surfaceTexture size.
|
// TODO: compute scale from surfaceTexture size.
|
||||||
float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f;
|
float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f;
|
||||||
float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f;
|
float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f;
|
||||||
// Whichever of the two scales is greater corresponds to the dimension where the image
|
// By default whichever of the two scales is greater corresponds to the dimension where the
|
||||||
// is proportionally smaller than the view. Dividing both scales by that number results
|
// image is proportionally smaller than the view. Dividing both scales by that number results
|
||||||
// in that dimension having scale 1.0, and thus touching the edges of the view, while the
|
// in that dimension having scale 1.0, and thus touching the edges of the view, while the
|
||||||
// other is cropped proportionally.
|
// other is cropped proportionally. If shouldFitToWidth is set as true, use the min scale
|
||||||
float maxScale = max(scaleWidth, scaleHeight);
|
// if frame width is greater than frame height.
|
||||||
scaleWidth /= maxScale;
|
float scale = max(scaleWidth, scaleHeight);
|
||||||
scaleHeight /= maxScale;
|
if (shouldFitToWidth && (frameWidth > frameHeight)) {
|
||||||
|
scale = min(scaleWidth, scaleHeight);
|
||||||
|
}
|
||||||
|
scaleWidth /= scale;
|
||||||
|
scaleHeight /= scale;
|
||||||
|
|
||||||
// Alignment controls where the visible section is placed within the full camera frame, with
|
// Alignment controls where the visible section is placed within the full camera frame, with
|
||||||
// (0, 0) being the bottom left, and (1, 1) being the top right.
|
// (0, 0) being the bottom left, and (1, 1) being the top right.
|
||||||
|
@ -232,6 +293,11 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
frameHeight = height;
|
frameHeight = height;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Supports fit to width when the frame width is greater than the frame height. */
|
||||||
|
public void setShouldFitToWidth(boolean shouldFitToWidth) {
|
||||||
|
this.shouldFitToWidth = shouldFitToWidth;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When the aspect ratios between the camera frame and the surface size are mismatched, this
|
* When the aspect ratios between the camera frame and the surface size are mismatched, this
|
||||||
* controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning
|
* controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning
|
||||||
|
|
|
@ -35,7 +35,6 @@ cc_library(
|
||||||
"//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto",
|
"//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto",
|
||||||
"//mediapipe/tasks/cc/components/processors:embedder_options",
|
"//mediapipe/tasks/cc/components/processors:embedder_options",
|
||||||
"//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto",
|
"//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto",
|
||||||
"//mediapipe/tasks/cc/components/utils:cosine_similarity",
|
|
||||||
"//mediapipe/tasks/cc/core:base_options",
|
"//mediapipe/tasks/cc/core:base_options",
|
||||||
"//mediapipe/tasks/cc/core:task_runner",
|
"//mediapipe/tasks/cc/core:task_runner",
|
||||||
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
"//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
|
||||||
|
|
|
@ -29,7 +29,6 @@ limitations under the License.
|
||||||
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
|
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
|
||||||
#include "mediapipe/tasks/cc/components/processors/embedder_options.h"
|
#include "mediapipe/tasks/cc/components/processors/embedder_options.h"
|
||||||
#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h"
|
#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/components/utils/cosine_similarity.h"
|
|
||||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
|
||||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||||
|
@ -147,10 +146,4 @@ absl::Status AudioEmbedder::EmbedAsync(Matrix audio_block,
|
||||||
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
|
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::StatusOr<double> AudioEmbedder::CosineSimilarity(
|
|
||||||
const components::containers::Embedding& u,
|
|
||||||
const components::containers::Embedding& v) {
|
|
||||||
return components::utils::CosineSimilarity(u, v);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mediapipe::tasks::audio::audio_embedder
|
} // namespace mediapipe::tasks::audio::audio_embedder
|
||||||
|
|
|
@ -125,16 +125,6 @@ class AudioEmbedder : core::BaseAudioTaskApi {
|
||||||
|
|
||||||
// Shuts down the AudioEmbedder when all works are done.
|
// Shuts down the AudioEmbedder when all works are done.
|
||||||
absl::Status Close() { return runner_->Close(); }
|
absl::Status Close() { return runner_->Close(); }
|
||||||
|
|
||||||
// Utility function to compute cosine similarity [1] between two embeddings.
|
|
||||||
// May return an InvalidArgumentError if e.g. the embeddings are of different
|
|
||||||
// types (quantized vs. float), have different sizes, or have a an L2-norm of
|
|
||||||
// 0.
|
|
||||||
//
|
|
||||||
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
|
||||||
static absl::StatusOr<double> CosineSimilarity(
|
|
||||||
const components::containers::Embedding& u,
|
|
||||||
const components::containers::Embedding& v);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace mediapipe::tasks::audio::audio_embedder
|
} // namespace mediapipe::tasks::audio::audio_embedder
|
||||||
|
|
|
@ -54,8 +54,6 @@ constexpr char kModelWithMetadata[] = "yamnet_embedding_metadata.tflite";
|
||||||
constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav";
|
constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav";
|
||||||
constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav";
|
constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav";
|
||||||
constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav";
|
constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav";
|
||||||
constexpr float kSpeechSimilarities[] = {0.985359, 0.994349, 0.993227, 0.996658,
|
|
||||||
0.996384};
|
|
||||||
constexpr int kMilliSecondsPerSecond = 1000;
|
constexpr int kMilliSecondsPerSecond = 1000;
|
||||||
constexpr int kYamnetNumOfAudioSamples = 15600;
|
constexpr int kYamnetNumOfAudioSamples = 15600;
|
||||||
constexpr int kYamnetAudioSampleRate = 16000;
|
constexpr int kYamnetAudioSampleRate = 16000;
|
||||||
|
@ -163,15 +161,9 @@ TEST_F(EmbedTest, SucceedsWithSameAudioAtDifferentSampleRates) {
|
||||||
audio_embedder->Embed(audio_buffer1, 16000));
|
audio_embedder->Embed(audio_buffer1, 16000));
|
||||||
MP_ASSERT_OK_AND_ASSIGN(auto result2,
|
MP_ASSERT_OK_AND_ASSIGN(auto result2,
|
||||||
audio_embedder->Embed(audio_buffer2, 48000));
|
audio_embedder->Embed(audio_buffer2, 48000));
|
||||||
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
|
int expected_size = 5;
|
||||||
ASSERT_EQ(result1.size(), expected_size);
|
ASSERT_EQ(result1.size(), expected_size);
|
||||||
ASSERT_EQ(result2.size(), expected_size);
|
ASSERT_EQ(result2.size(), expected_size);
|
||||||
for (int i = 0; i < expected_size; ++i) {
|
|
||||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
|
||||||
result1[i].embeddings[0],
|
|
||||||
result2[i].embeddings[0]));
|
|
||||||
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
|
|
||||||
}
|
|
||||||
MP_EXPECT_OK(audio_embedder->Close());
|
MP_EXPECT_OK(audio_embedder->Close());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,10 +184,6 @@ TEST_F(EmbedTest, SucceedsWithDifferentAudios) {
|
||||||
audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate));
|
audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate));
|
||||||
ASSERT_EQ(result1.size(), 5);
|
ASSERT_EQ(result1.size(), 5);
|
||||||
ASSERT_EQ(result2.size(), 1);
|
ASSERT_EQ(result2.size(), 1);
|
||||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
|
||||||
result1[0].embeddings[0],
|
|
||||||
result2[0].embeddings[0]));
|
|
||||||
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
|
|
||||||
MP_EXPECT_OK(audio_embedder->Close());
|
MP_EXPECT_OK(audio_embedder->Close());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,15 +246,9 @@ TEST_F(EmbedAsyncTest, SucceedsWithSameAudioAtDifferentSampleRates) {
|
||||||
RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1);
|
RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1);
|
||||||
std::vector<AudioEmbedderResult> result2;
|
std::vector<AudioEmbedderResult> result2;
|
||||||
RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2);
|
RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2);
|
||||||
int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
|
int expected_size = 5;
|
||||||
ASSERT_EQ(result1.size(), expected_size);
|
ASSERT_EQ(result1.size(), expected_size);
|
||||||
ASSERT_EQ(result2.size(), expected_size);
|
ASSERT_EQ(result2.size(), expected_size);
|
||||||
for (int i = 0; i < expected_size; ++i) {
|
|
||||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
|
||||||
result1[i].embeddings[0],
|
|
||||||
result2[i].embeddings[0]));
|
|
||||||
EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
|
TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
|
||||||
|
@ -276,10 +258,6 @@ TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
|
||||||
RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2);
|
RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2);
|
||||||
ASSERT_EQ(result1.size(), 5);
|
ASSERT_EQ(result1.size(), 5);
|
||||||
ASSERT_EQ(result2.size(), 1);
|
ASSERT_EQ(result2.size(), 1);
|
||||||
MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
|
|
||||||
result1[0].embeddings[0],
|
|
||||||
result2[0].embeddings[0]));
|
|
||||||
EXPECT_NEAR(similarity, 0.09017f, 1e-6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -185,15 +185,15 @@ TEST_P(CalibrationWithoutIndicesTest, Succeeds) {
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest,
|
ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest,
|
||||||
Values(CalibrationTestParams{.score_transformation = "IDENTITY",
|
Values(CalibrationTestParams{
|
||||||
.expected_results = {0.4948505976,
|
/* score_transformation= */ "IDENTITY",
|
||||||
0.5059588508, 0.2, 0.2}},
|
/* expected_results= */ {0.4948505976, 0.5059588508, 0.2, 0.2}},
|
||||||
CalibrationTestParams{
|
CalibrationTestParams{
|
||||||
.score_transformation = "LOG",
|
/* score_transformation= */ "LOG",
|
||||||
.expected_results = {0.2976901255, 0.3393665735, 0.2, 0.2}},
|
/* expected_results= */ {0.2976901255, 0.3393665735, 0.2, 0.2}},
|
||||||
CalibrationTestParams{
|
CalibrationTestParams{
|
||||||
.score_transformation = "INVERSE_LOGISTIC",
|
/* score_transformation= */ "INVERSE_LOGISTIC",
|
||||||
.expected_results = {0.3203217641, 0.3778080605, 0.2, 0.2}}),
|
/* expected_results= */ {0.3203217641, 0.3778080605, 0.2, 0.2}}),
|
||||||
[](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) {
|
[](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) {
|
||||||
return info.param.score_transformation;
|
return info.param.score_transformation;
|
||||||
});
|
});
|
||||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||||
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_
|
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_
|
||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
|
@ -332,9 +332,11 @@ cc_library(
|
||||||
"//mediapipe/tasks:internal",
|
"//mediapipe/tasks:internal",
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
|
":external_file_handler",
|
||||||
"//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
|
"//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
|
||||||
"//mediapipe/framework:calculator_cc_proto",
|
"//mediapipe/framework:calculator_cc_proto",
|
||||||
"//mediapipe/framework/api2:builder",
|
"//mediapipe/framework/api2:builder",
|
||||||
|
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
|
||||||
"//mediapipe/tasks/metadata:metadata_schema_cc",
|
"//mediapipe/tasks/metadata:metadata_schema_cc",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
"@flatbuffers//:runtime_cc",
|
"@flatbuffers//:runtime_cc",
|
||||||
|
@ -375,6 +377,5 @@ cc_test(
|
||||||
"//mediapipe/tasks/cc:common",
|
"//mediapipe/tasks/cc:common",
|
||||||
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
|
"//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
|
||||||
"//mediapipe/tasks/cc/metadata/utils:zip_utils",
|
"//mediapipe/tasks/cc/metadata/utils:zip_utils",
|
||||||
"@org_tensorflow//tensorflow/lite/c:common",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -29,7 +29,7 @@ limitations under the License.
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#else
|
#else
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif // _WIN32
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -102,9 +102,13 @@ absl::StatusOr<std::string> PathToResourceAsFile(std::string path) {
|
||||||
#else
|
#else
|
||||||
if (absl::StartsWith(path, "./")) {
|
if (absl::StartsWith(path, "./")) {
|
||||||
path = "mediapipe" + path.substr(1);
|
path = "mediapipe" + path.substr(1);
|
||||||
|
} else if (path[0] != '/') {
|
||||||
|
path = "mediapipe/" + path;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string error;
|
std::string error;
|
||||||
|
// TODO: We should ideally use `CreateForTests` when this is
|
||||||
|
// accessed from unit tests.
|
||||||
std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles(
|
std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles(
|
||||||
::bazel::tools::cpp::runfiles::Runfiles::Create("", &error));
|
::bazel::tools::cpp::runfiles::Runfiles::Create("", &error));
|
||||||
if (!runfiles) {
|
if (!runfiles) {
|
||||||
|
|
|
@ -88,6 +88,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFile) {
|
||||||
.status());
|
.status());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
|
TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
|
||||||
const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY);
|
const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY);
|
||||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||||
|
@ -103,6 +104,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
|
||||||
model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite")
|
model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite")
|
||||||
.status());
|
.status());
|
||||||
}
|
}
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) {
|
TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) {
|
||||||
auto file_content = LoadBinaryContent(kTestModelBundlePath);
|
auto file_content = LoadBinaryContent(kTestModelBundlePath);
|
||||||
|
|
|
@ -136,6 +136,7 @@ TEST_F(ModelResourcesTest, CreateFromFile) {
|
||||||
CheckModelResourcesPackets(model_resources.get());
|
CheckModelResourcesPackets(model_resources.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
|
TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
|
||||||
const int model_file_descriptor = open(kTestModelPath, O_RDONLY);
|
const int model_file_descriptor = open(kTestModelPath, O_RDONLY);
|
||||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||||
|
@ -145,6 +146,7 @@ TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
|
||||||
ModelResources::Create(kTestModelResourcesTag, std::move(model_file)));
|
ModelResources::Create(kTestModelResourcesTag, std::move(model_file)));
|
||||||
CheckModelResourcesPackets(model_resources.get());
|
CheckModelResourcesPackets(model_resources.get());
|
||||||
}
|
}
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
TEST_F(ModelResourcesTest, CreateFromInvalidFile) {
|
TEST_F(ModelResourcesTest, CreateFromInvalidFile) {
|
||||||
auto model_file = std::make_unique<proto::ExternalFile>();
|
auto model_file = std::make_unique<proto::ExternalFile>();
|
||||||
|
@ -168,6 +170,15 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
|
||||||
auto status_or_model_resources =
|
auto status_or_model_resources =
|
||||||
ModelResources::Create(kTestModelResourcesTag, std::move(model_file));
|
ModelResources::Create(kTestModelResourcesTag, std::move(model_file));
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
EXPECT_EQ(status_or_model_resources.status().code(),
|
||||||
|
absl::StatusCode::kFailedPrecondition);
|
||||||
|
EXPECT_THAT(
|
||||||
|
status_or_model_resources.status().message(),
|
||||||
|
testing::HasSubstr("File descriptors are not supported on Windows."));
|
||||||
|
AssertStatusHasMediaPipeTasksStatusCode(status_or_model_resources.status(),
|
||||||
|
MediaPipeTasksStatus::kFileReadError);
|
||||||
|
#else
|
||||||
EXPECT_EQ(status_or_model_resources.status().code(),
|
EXPECT_EQ(status_or_model_resources.status().code(),
|
||||||
absl::StatusCode::kInvalidArgument);
|
absl::StatusCode::kInvalidArgument);
|
||||||
EXPECT_THAT(
|
EXPECT_THAT(
|
||||||
|
@ -176,6 +187,7 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
|
||||||
AssertStatusHasMediaPipeTasksStatusCode(
|
AssertStatusHasMediaPipeTasksStatusCode(
|
||||||
status_or_model_resources.status(),
|
status_or_model_resources.status(),
|
||||||
MediaPipeTasksStatus::kInvalidArgumentError);
|
MediaPipeTasksStatus::kInvalidArgumentError);
|
||||||
|
#endif // _WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) {
|
TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) {
|
||||||
|
|
|
@ -23,6 +23,8 @@ limitations under the License.
|
||||||
#include "absl/strings/string_view.h"
|
#include "absl/strings/string_view.h"
|
||||||
#include "flatbuffers/flatbuffers.h"
|
#include "flatbuffers/flatbuffers.h"
|
||||||
#include "mediapipe/calculators/core/flow_limiter_calculator.pb.h"
|
#include "mediapipe/calculators/core/flow_limiter_calculator.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/core/external_file_handler.h"
|
||||||
|
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace tasks {
|
namespace tasks {
|
||||||
|
@ -34,13 +36,11 @@ constexpr char kFlowLimiterCalculatorName[] = "FlowLimiterCalculator";
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::string LoadBinaryContent(const char* filename) {
|
std::string LoadBinaryContent(const char* filename) {
|
||||||
std::ifstream input_file(filename, std::ios::binary | std::ios::ate);
|
proto::ExternalFile external_file;
|
||||||
// Find buffer size from input file, and load the buffer.
|
external_file.set_file_name(filename);
|
||||||
size_t buffer_size = input_file.tellg();
|
auto file_handler =
|
||||||
std::string buffer(buffer_size, '\0');
|
ExternalFileHandler::CreateFromExternalFile(&external_file);
|
||||||
input_file.seekg(0, std::ios::beg);
|
return std::string{(*file_handler)->GetFileContent()};
|
||||||
input_file.read(const_cast<char*>(buffer.c_str()), buffer_size);
|
|
||||||
return buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int FindTensorIndexByMetadataName(
|
int FindTensorIndexByMetadataName(
|
||||||
|
|
|
@ -16,6 +16,7 @@ cc_test(
|
||||||
"//mediapipe/framework/port:gtest_main",
|
"//mediapipe/framework/port:gtest_main",
|
||||||
"//mediapipe/framework/port:status",
|
"//mediapipe/framework/port:status",
|
||||||
"//mediapipe/tasks/cc:common",
|
"//mediapipe/tasks/cc:common",
|
||||||
|
"//mediapipe/tasks/cc/core:utils",
|
||||||
"//mediapipe/tasks/cc/metadata:metadata_extractor",
|
"//mediapipe/tasks/cc/metadata:metadata_extractor",
|
||||||
"@com_google_absl//absl/status",
|
"@com_google_absl//absl/status",
|
||||||
"@com_google_absl//absl/status:statusor",
|
"@com_google_absl//absl/status:statusor",
|
||||||
|
|
|
@ -25,12 +25,14 @@ limitations under the License.
|
||||||
#include "mediapipe/framework/port/status_macros.h"
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
#include "mediapipe/framework/port/status_matchers.h"
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
#include "mediapipe/tasks/cc/common.h"
|
#include "mediapipe/tasks/cc/common.h"
|
||||||
|
#include "mediapipe/tasks/cc/core/utils.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace tasks {
|
namespace tasks {
|
||||||
namespace metadata {
|
namespace metadata {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
using core::LoadBinaryContent;
|
||||||
using ::testing::Optional;
|
using ::testing::Optional;
|
||||||
|
|
||||||
constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata";
|
constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata";
|
||||||
|
@ -53,8 +55,8 @@ constexpr char kRandomTextFile[] = "external_file";
|
||||||
|
|
||||||
absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor(
|
absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor(
|
||||||
std::string model_name, std::string* file_contents) {
|
std::string model_name, std::string* file_contents) {
|
||||||
MP_RETURN_IF_ERROR(file::GetContents(
|
*file_contents = LoadBinaryContent(
|
||||||
file::JoinPath("./", kTestDataDirectory, model_name), file_contents));
|
file::JoinPath("./", kTestDataDirectory, model_name).c_str());
|
||||||
return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(),
|
return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(),
|
||||||
file_contents->length());
|
file_contents->length());
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,11 @@ using ::testing::MatchesRegex;
|
||||||
|
|
||||||
TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) {
|
TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) {
|
||||||
// Validates that the version is well-formed (x.y.z).
|
// Validates that the version is well-formed (x.y.z).
|
||||||
|
#ifdef _WIN32
|
||||||
|
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
|
||||||
|
#else
|
||||||
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
||||||
|
#endif // _WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -83,7 +83,11 @@ TEST(MetadataVersionTest,
|
||||||
builder.GetSize(), &min_version),
|
builder.GetSize(), &min_version),
|
||||||
kTfLiteOk);
|
kTfLiteOk);
|
||||||
// Validates that the version is well-formed (x.y.z).
|
// Validates that the version is well-formed (x.y.z).
|
||||||
|
#ifdef _WIN32
|
||||||
|
EXPECT_THAT(min_version, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
|
||||||
|
#else
|
||||||
EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
|
||||||
|
#endif // _WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MetadataVersionTest,
|
TEST(MetadataVersionTest,
|
||||||
|
|
49
mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
Normal file
49
mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "geometry_pipeline_calculator_proto",
|
||||||
|
srcs = ["geometry_pipeline_calculator.proto"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "geometry_pipeline_calculator",
|
||||||
|
srcs = ["geometry_pipeline_calculator.cc"],
|
||||||
|
deps = [
|
||||||
|
":geometry_pipeline_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/port:logging",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:statusor",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/libs:geometry_pipeline",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/libs:validation_utils",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||||
|
"//mediapipe/util:resource_util",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
|
@ -0,0 +1,194 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||||
|
#include "mediapipe/util/resource_util.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
|
||||||
|
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||||
|
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
|
||||||
|
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
|
||||||
|
|
||||||
|
using ::mediapipe::tasks::vision::face_geometry::proto::Environment;
|
||||||
|
using ::mediapipe::tasks::vision::face_geometry::proto::FaceGeometry;
|
||||||
|
using ::mediapipe::tasks::vision::face_geometry::proto::
|
||||||
|
GeometryPipelineMetadata;
|
||||||
|
|
||||||
|
// A calculator that renders a visual effect for multiple faces.
|
||||||
|
//
|
||||||
|
// Inputs:
|
||||||
|
// IMAGE_SIZE (`std::pair<int, int>`, required):
|
||||||
|
// The size of the current frame. The first element of the pair is the frame
|
||||||
|
// width; the other one is the frame height.
|
||||||
|
//
|
||||||
|
// The face landmarks should have been detected on a frame with the same
|
||||||
|
// ratio. If used as-is, the resulting face geometry visualization should be
|
||||||
|
// happening on a frame with the same ratio as well.
|
||||||
|
//
|
||||||
|
// MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
|
||||||
|
// A vector of face landmark lists.
|
||||||
|
//
|
||||||
|
// Input side packets:
|
||||||
|
// ENVIRONMENT (`proto::Environment`, required)
|
||||||
|
// Describes an environment; includes the camera frame origin point location
|
||||||
|
// as well as virtual camera parameters.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// MULTI_FACE_GEOMETRY (`std::vector<FaceGeometry>`, required):
|
||||||
|
// A vector of face geometry data.
|
||||||
|
//
|
||||||
|
// Options:
|
||||||
|
// metadata_path (`string`, optional):
|
||||||
|
// Defines a path for the geometry pipeline metadata file.
|
||||||
|
//
|
||||||
|
// The geometry pipeline metadata file format must be the binary
|
||||||
|
// `GeometryPipelineMetadata` proto.
|
||||||
|
//
|
||||||
|
class GeometryPipelineCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
|
cc->InputSidePackets().Tag(kEnvironmentTag).Set<Environment>();
|
||||||
|
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||||
|
cc->Inputs()
|
||||||
|
.Tag(kMultiFaceLandmarksTag)
|
||||||
|
.Set<std::vector<mediapipe::NormalizedLandmarkList>>();
|
||||||
|
cc->Outputs().Tag(kMultiFaceGeometryTag).Set<std::vector<FaceGeometry>>();
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Open(CalculatorContext* cc) override {
|
||||||
|
cc->SetOffset(mediapipe::TimestampDiff(0));
|
||||||
|
|
||||||
|
const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
GeometryPipelineMetadata metadata,
|
||||||
|
ReadMetadataFromFile(options.metadata_path()),
|
||||||
|
_ << "Failed to read the geometry pipeline metadata from file!");
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
|
||||||
|
<< "Invalid geometry pipeline metadata!";
|
||||||
|
|
||||||
|
const Environment& environment =
|
||||||
|
cc->InputSidePackets().Tag(kEnvironmentTag).Get<Environment>();
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||||
|
<< "Invalid environment!";
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(geometry_pipeline_,
|
||||||
|
CreateGeometryPipeline(environment, metadata),
|
||||||
|
_ << "Failed to create a geometry pipeline!");
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
|
// Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
|
||||||
|
// to have a non-empty packet. In case this requirement is not met, there's
|
||||||
|
// nothing to be processed at the current timestamp.
|
||||||
|
if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
|
||||||
|
cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& image_size =
|
||||||
|
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||||
|
const auto& multi_face_landmarks =
|
||||||
|
cc->Inputs()
|
||||||
|
.Tag(kMultiFaceLandmarksTag)
|
||||||
|
.Get<std::vector<mediapipe::NormalizedLandmarkList>>();
|
||||||
|
|
||||||
|
auto multi_face_geometry = absl::make_unique<std::vector<FaceGeometry>>();
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
*multi_face_geometry,
|
||||||
|
geometry_pipeline_->EstimateFaceGeometry(
|
||||||
|
multi_face_landmarks, //
|
||||||
|
/*frame_width*/ image_size.first,
|
||||||
|
/*frame_height*/ image_size.second),
|
||||||
|
_ << "Failed to estimate face geometry for multiple faces!");
|
||||||
|
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kMultiFaceGeometryTag)
|
||||||
|
.AddPacket(mediapipe::Adopt<std::vector<FaceGeometry>>(
|
||||||
|
multi_face_geometry.release())
|
||||||
|
.At(cc->InputTimestamp()));
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status Close(CalculatorContext* cc) override {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static absl::StatusOr<GeometryPipelineMetadata> ReadMetadataFromFile(
|
||||||
|
const std::string& metadata_path) {
|
||||||
|
ASSIGN_OR_RETURN(std::string metadata_blob,
|
||||||
|
ReadContentBlobFromFile(metadata_path),
|
||||||
|
_ << "Failed to read a metadata blob from file!");
|
||||||
|
|
||||||
|
GeometryPipelineMetadata metadata;
|
||||||
|
RET_CHECK(metadata.ParseFromString(metadata_blob))
|
||||||
|
<< "Failed to parse a metadata proto from a binary blob!";
|
||||||
|
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
static absl::StatusOr<std::string> ReadContentBlobFromFile(
|
||||||
|
const std::string& unresolved_path) {
|
||||||
|
ASSIGN_OR_RETURN(std::string resolved_path,
|
||||||
|
mediapipe::PathToResourceAsFile(unresolved_path),
|
||||||
|
_ << "Failed to resolve path! Path = " << unresolved_path);
|
||||||
|
|
||||||
|
std::string content_blob;
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
mediapipe::GetResourceContents(resolved_path, &content_blob))
|
||||||
|
<< "Failed to read content blob! Resolved path = " << resolved_path;
|
||||||
|
|
||||||
|
return content_blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GeometryPipeline> geometry_pipeline_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
|
||||||
|
|
||||||
|
REGISTER_CALCULATOR(
|
||||||
|
::mediapipe::tasks::vision::face_geometry::FaceGeometryPipelineCalculator);
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,27 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks.vision.face_geometry;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator_options.proto";
|
||||||
|
|
||||||
|
message FaceGeometryPipelineCalculatorOptions {
|
||||||
|
extend mediapipe.CalculatorOptions {
|
||||||
|
optional FaceGeometryPipelineCalculatorOptions ext = 512499200;
|
||||||
|
}
|
||||||
|
|
||||||
|
optional string metadata_path = 1;
|
||||||
|
}
|
59
mediapipe/tasks/cc/vision/face_geometry/data/BUILD
Normal file
59
mediapipe/tasks/cc/vision/face_geometry/data/BUILD
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
encode_binary_proto(
|
||||||
|
name = "geometry_pipeline_metadata_detection",
|
||||||
|
input = "geometry_pipeline_metadata_detection.pbtxt",
|
||||||
|
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||||
|
output = "geometry_pipeline_metadata_detection.binarypb",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
encode_binary_proto(
|
||||||
|
name = "geometry_pipeline_metadata_landmarks",
|
||||||
|
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||||
|
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||||
|
output = "geometry_pipeline_metadata_landmarks.binarypb",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
|
||||||
|
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
|
||||||
|
encode_binary_proto(
|
||||||
|
name = "geometry_pipeline_metadata",
|
||||||
|
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||||
|
message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
|
||||||
|
output = "geometry_pipeline_metadata.binarypb",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# These canonical face model files are not meant to be used in runtime, but rather for asset
|
||||||
|
# creation and/or reference.
|
||||||
|
exports_files([
|
||||||
|
"canonical_face_model.fbx",
|
||||||
|
"canonical_face_model.obj",
|
||||||
|
"canonical_face_model_uv_visualization.png",
|
||||||
|
])
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 731 KiB |
|
@ -0,0 +1,78 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
input_source: FACE_DETECTION_PIPELINE
|
||||||
|
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
|
||||||
|
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
|
||||||
|
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
|
||||||
|
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
|
||||||
|
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
|
||||||
|
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
|
||||||
|
# NOTE: the triangular topology of the face meshes is only useful when derived
|
||||||
|
# from the 468 face landmarks, not from the 6 face detection landmarks
|
||||||
|
# (keypoints). The former don't cover the entire face and this mesh is
|
||||||
|
# defined here only to comply with the API. It should be considered as
|
||||||
|
# a placeholder and/or for debugging purposes.
|
||||||
|
#
|
||||||
|
# Use the face geometry derived from the face detection landmarks
|
||||||
|
# (keypoints) for the face pose transformation matrix, not the mesh.
|
||||||
|
canonical_mesh: {
|
||||||
|
vertex_type: VERTEX_PT
|
||||||
|
primitive_type: TRIANGLE
|
||||||
|
vertex_buffer: -3.1511454582214355
|
||||||
|
vertex_buffer: 2.6246179342269897
|
||||||
|
vertex_buffer: 3.4656630754470825
|
||||||
|
vertex_buffer: 0.349575996398926
|
||||||
|
vertex_buffer: 0.38137748837470997
|
||||||
|
vertex_buffer: 3.1511454582214355
|
||||||
|
vertex_buffer: 2.6246179342269897
|
||||||
|
vertex_buffer: 3.4656630754470825
|
||||||
|
vertex_buffer: 0.650443494319916
|
||||||
|
vertex_buffer: 0.38137999176979054
|
||||||
|
vertex_buffer: 0.0
|
||||||
|
vertex_buffer: -1.126865029335022
|
||||||
|
vertex_buffer: 7.475604057312012
|
||||||
|
vertex_buffer: 0.500025987625122
|
||||||
|
vertex_buffer: 0.547487020492554
|
||||||
|
vertex_buffer: 0.0
|
||||||
|
vertex_buffer: -4.304508209228516
|
||||||
|
vertex_buffer: 4.162498950958252
|
||||||
|
vertex_buffer: 0.499989986419678
|
||||||
|
vertex_buffer: 0.694203019142151
|
||||||
|
vertex_buffer: -7.664182186126709
|
||||||
|
vertex_buffer: 0.673132002353668
|
||||||
|
vertex_buffer: -2.435867071151733
|
||||||
|
vertex_buffer: 0.007561000064015
|
||||||
|
vertex_buffer: 0.480777025222778
|
||||||
|
vertex_buffer: 7.664182186126709
|
||||||
|
vertex_buffer: 0.673132002353668
|
||||||
|
vertex_buffer: -2.435867071151733
|
||||||
|
vertex_buffer: 0.992439985275269
|
||||||
|
vertex_buffer: 0.480777025222778
|
||||||
|
index_buffer: 0
|
||||||
|
index_buffer: 1
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 1
|
||||||
|
index_buffer: 5
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 4
|
||||||
|
index_buffer: 0
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 4
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 3
|
||||||
|
index_buffer: 2
|
||||||
|
index_buffer: 5
|
||||||
|
index_buffer: 3
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
80
mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
Normal file
80
mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "geometry_pipeline",
|
||||||
|
srcs = ["geometry_pipeline.cc"],
|
||||||
|
hdrs = ["geometry_pipeline.h"],
|
||||||
|
deps = [
|
||||||
|
":mesh_3d_utils",
|
||||||
|
":procrustes_solver",
|
||||||
|
":validation_utils",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:matrix",
|
||||||
|
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:statusor",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mesh_3d_utils",
|
||||||
|
srcs = ["mesh_3d_utils.cc"],
|
||||||
|
hdrs = ["mesh_3d_utils.h"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:statusor",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "procrustes_solver",
|
||||||
|
srcs = ["procrustes_solver.cc"],
|
||||||
|
hdrs = ["procrustes_solver.h"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:statusor",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
|
"@eigen_archive//:eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "validation_utils",
|
||||||
|
srcs = ["validation_utils.cc"],
|
||||||
|
hdrs = ["validation_utils.h"],
|
||||||
|
deps = [
|
||||||
|
":mesh_3d_utils",
|
||||||
|
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
|
||||||
|
"//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,471 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "Eigen/Core"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/matrix.h"
|
||||||
|
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct PerspectiveCameraFrustum {
|
||||||
|
// NOTE: all arguments must be validated prior to calling this constructor.
|
||||||
|
PerspectiveCameraFrustum(const proto::PerspectiveCamera& perspective_camera,
|
||||||
|
int frame_width, int frame_height) {
|
||||||
|
static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
|
||||||
|
|
||||||
|
const float height_at_near =
|
||||||
|
2.f * perspective_camera.near() *
|
||||||
|
std::tan(0.5f * kDegreesToRadians *
|
||||||
|
perspective_camera.vertical_fov_degrees());
|
||||||
|
|
||||||
|
const float width_at_near = frame_width * height_at_near / frame_height;
|
||||||
|
|
||||||
|
left = -0.5f * width_at_near;
|
||||||
|
right = 0.5f * width_at_near;
|
||||||
|
bottom = -0.5f * height_at_near;
|
||||||
|
top = 0.5f * height_at_near;
|
||||||
|
near = perspective_camera.near();
|
||||||
|
far = perspective_camera.far();
|
||||||
|
}
|
||||||
|
|
||||||
|
float left;
|
||||||
|
float right;
|
||||||
|
float bottom;
|
||||||
|
float top;
|
||||||
|
float near;
|
||||||
|
float far;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ScreenToMetricSpaceConverter {
|
||||||
|
public:
|
||||||
|
ScreenToMetricSpaceConverter(
|
||||||
|
proto::OriginPointLocation origin_point_location, //
|
||||||
|
proto::InputSource input_source, //
|
||||||
|
Eigen::Matrix3Xf&& canonical_metric_landmarks, //
|
||||||
|
Eigen::VectorXf&& landmark_weights, //
|
||||||
|
std::unique_ptr<ProcrustesSolver> procrustes_solver)
|
||||||
|
: origin_point_location_(origin_point_location),
|
||||||
|
input_source_(input_source),
|
||||||
|
canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
|
||||||
|
landmark_weights_(std::move(landmark_weights)),
|
||||||
|
procrustes_solver_(std::move(procrustes_solver)) {}
|
||||||
|
|
||||||
|
// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
|
||||||
|
// the `pose_transform_mat`.
|
||||||
|
//
|
||||||
|
// Here's the algorithm summary:
|
||||||
|
//
|
||||||
|
// (1) Project X- and Y- screen landmark coordinates at the Z near plane.
|
||||||
|
//
|
||||||
|
// (2) Estimate a canonical-to-runtime landmark set scale by running the
|
||||||
|
// Procrustes solver using the screen runtime landmarks.
|
||||||
|
//
|
||||||
|
// On this iteration, screen landmarks are used instead of unprojected
|
||||||
|
// metric landmarks as it is not safe to unproject due to the relative
|
||||||
|
// nature of the input screen landmark Z coordinate.
|
||||||
|
//
|
||||||
|
// (3) Use the canonical-to-runtime scale from (2) to unproject the screen
|
||||||
|
// landmarks. The result is referenced as "intermediate landmarks" because
|
||||||
|
// they are the first estimation of the resuling metric landmarks, but are
|
||||||
|
// not quite there yet.
|
||||||
|
//
|
||||||
|
// (4) Estimate a canonical-to-runtime landmark set scale by running the
|
||||||
|
// Procrustes solver using the intermediate runtime landmarks.
|
||||||
|
//
|
||||||
|
// (5) Use the product of the scale factors from (2) and (4) to unproject
|
||||||
|
// the screen landmarks the second time. This is the second and the final
|
||||||
|
// estimation of the metric landmarks.
|
||||||
|
//
|
||||||
|
// (6) Multiply each of the metric landmarks by the inverse pose
|
||||||
|
// transformation matrix to align the runtime metric face landmarks with
|
||||||
|
// the canonical metric face landmarks.
|
||||||
|
//
|
||||||
|
// Note: the input screen landmarks are in the left-handed coordinate system,
|
||||||
|
// however any metric landmarks - including the canonical metric
|
||||||
|
// landmarks, the final runtime metric landmarks and any intermediate
|
||||||
|
// runtime metric landmarks - are in the right-handed coordinate system.
|
||||||
|
//
|
||||||
|
// To keep the logic correct, the landmark set handedness is changed any
|
||||||
|
// time the screen-to-metric semantic barrier is passed.
|
||||||
|
absl::Status Convert(
|
||||||
|
const mediapipe::NormalizedLandmarkList& screen_landmark_list, //
|
||||||
|
const PerspectiveCameraFrustum& pcf, //
|
||||||
|
mediapipe::LandmarkList& metric_landmark_list, //
|
||||||
|
Eigen::Matrix4f& pose_transform_mat) const {
|
||||||
|
RET_CHECK_EQ(screen_landmark_list.landmark_size(),
|
||||||
|
canonical_metric_landmarks_.cols())
|
||||||
|
<< "The number of landmarks doesn't match the number passed upon "
|
||||||
|
"initialization!";
|
||||||
|
|
||||||
|
Eigen::Matrix3Xf screen_landmarks;
|
||||||
|
ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
|
||||||
|
|
||||||
|
ProjectXY(pcf, screen_landmarks);
|
||||||
|
const float depth_offset = screen_landmarks.row(2).mean();
|
||||||
|
|
||||||
|
// 1st iteration: don't unproject XY because it's unsafe to do so due to
|
||||||
|
// the relative nature of the Z coordinate. Instead, run the
|
||||||
|
// first estimation on the projected XY and use that scale to
|
||||||
|
// unproject for the 2nd iteration.
|
||||||
|
Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
|
||||||
|
ChangeHandedness(intermediate_landmarks);
|
||||||
|
|
||||||
|
ASSIGN_OR_RETURN(const float first_iteration_scale,
|
||||||
|
EstimateScale(intermediate_landmarks),
|
||||||
|
_ << "Failed to estimate first iteration scale!");
|
||||||
|
|
||||||
|
// 2nd iteration: unproject XY using the scale from the 1st iteration.
|
||||||
|
intermediate_landmarks = screen_landmarks;
|
||||||
|
MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
|
||||||
|
intermediate_landmarks);
|
||||||
|
UnprojectXY(pcf, intermediate_landmarks);
|
||||||
|
ChangeHandedness(intermediate_landmarks);
|
||||||
|
|
||||||
|
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||||
|
// landmarks.
|
||||||
|
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
|
||||||
|
Eigen::Matrix4f intermediate_pose_transform_mat;
|
||||||
|
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||||
|
canonical_metric_landmarks_, intermediate_landmarks,
|
||||||
|
landmark_weights_, intermediate_pose_transform_mat))
|
||||||
|
<< "Failed to estimate pose transform matrix!";
|
||||||
|
|
||||||
|
intermediate_landmarks.row(2) =
|
||||||
|
(intermediate_pose_transform_mat *
|
||||||
|
canonical_metric_landmarks_.colwise().homogeneous())
|
||||||
|
.row(2);
|
||||||
|
}
|
||||||
|
ASSIGN_OR_RETURN(const float second_iteration_scale,
|
||||||
|
EstimateScale(intermediate_landmarks),
|
||||||
|
_ << "Failed to estimate second iteration scale!");
|
||||||
|
|
||||||
|
// Use the total scale to unproject the screen landmarks.
|
||||||
|
const float total_scale = first_iteration_scale * second_iteration_scale;
|
||||||
|
MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
|
||||||
|
UnprojectXY(pcf, screen_landmarks);
|
||||||
|
ChangeHandedness(screen_landmarks);
|
||||||
|
|
||||||
|
// At this point, screen landmarks are converted into metric landmarks.
|
||||||
|
Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||||
|
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||||
|
pose_transform_mat))
|
||||||
|
<< "Failed to estimate pose transform matrix!";
|
||||||
|
|
||||||
|
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||||
|
// landmarks and run the pose transform estimation again.
|
||||||
|
if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
|
||||||
|
metric_landmarks.row(2) =
|
||||||
|
(pose_transform_mat *
|
||||||
|
canonical_metric_landmarks_.colwise().homogeneous())
|
||||||
|
.row(2);
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||||
|
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||||
|
pose_transform_mat))
|
||||||
|
<< "Failed to estimate pose transform matrix!";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiply each of the metric landmarks by the inverse pose
|
||||||
|
// transformation matrix to align the runtime metric face landmarks with
|
||||||
|
// the canonical metric face landmarks.
|
||||||
|
metric_landmarks = (pose_transform_mat.inverse() *
|
||||||
|
metric_landmarks.colwise().homogeneous())
|
||||||
|
.topRows(3);
|
||||||
|
|
||||||
|
ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void ProjectXY(const PerspectiveCameraFrustum& pcf,
|
||||||
|
Eigen::Matrix3Xf& landmarks) const {
|
||||||
|
float x_scale = pcf.right - pcf.left;
|
||||||
|
float y_scale = pcf.top - pcf.bottom;
|
||||||
|
float x_translation = pcf.left;
|
||||||
|
float y_translation = pcf.bottom;
|
||||||
|
|
||||||
|
if (origin_point_location_ == proto::OriginPointLocation::TOP_LEFT_CORNER) {
|
||||||
|
landmarks.row(1) = 1.f - landmarks.row(1).array();
|
||||||
|
}
|
||||||
|
|
||||||
|
landmarks =
|
||||||
|
landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
|
||||||
|
landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
|
||||||
|
Eigen::Matrix4f transform_mat;
|
||||||
|
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||||
|
canonical_metric_landmarks_, landmarks, landmark_weights_,
|
||||||
|
transform_mat))
|
||||||
|
<< "Failed to estimate canonical-to-runtime landmark set transform!";
|
||||||
|
|
||||||
|
return transform_mat.col(0).norm();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
|
||||||
|
float depth_offset, float scale,
|
||||||
|
Eigen::Matrix3Xf& landmarks) {
|
||||||
|
landmarks.row(2) =
|
||||||
|
(landmarks.array().row(2) - depth_offset + pcf.near) / scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
|
||||||
|
Eigen::Matrix3Xf& landmarks) {
|
||||||
|
landmarks.row(0) =
|
||||||
|
landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||||
|
landmarks.row(1) =
|
||||||
|
landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
|
||||||
|
landmarks.row(2) *= -1.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ConvertLandmarkListToEigenMatrix(
|
||||||
|
const mediapipe::NormalizedLandmarkList& landmark_list,
|
||||||
|
Eigen::Matrix3Xf& eigen_matrix) {
|
||||||
|
eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
|
||||||
|
for (int i = 0; i < landmark_list.landmark_size(); ++i) {
|
||||||
|
const auto& landmark = landmark_list.landmark(i);
|
||||||
|
eigen_matrix(0, i) = landmark.x();
|
||||||
|
eigen_matrix(1, i) = landmark.y();
|
||||||
|
eigen_matrix(2, i) = landmark.z();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ConvertEigenMatrixToLandmarkList(
|
||||||
|
const Eigen::Matrix3Xf& eigen_matrix,
|
||||||
|
mediapipe::LandmarkList& landmark_list) {
|
||||||
|
landmark_list.Clear();
|
||||||
|
|
||||||
|
for (int i = 0; i < eigen_matrix.cols(); ++i) {
|
||||||
|
auto& landmark = *landmark_list.add_landmark();
|
||||||
|
landmark.set_x(eigen_matrix(0, i));
|
||||||
|
landmark.set_y(eigen_matrix(1, i));
|
||||||
|
landmark.set_z(eigen_matrix(2, i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const proto::OriginPointLocation origin_point_location_;
|
||||||
|
const proto::InputSource input_source_;
|
||||||
|
Eigen::Matrix3Xf canonical_metric_landmarks_;
|
||||||
|
Eigen::VectorXf landmark_weights_;
|
||||||
|
|
||||||
|
std::unique_ptr<ProcrustesSolver> procrustes_solver_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GeometryPipelineImpl : public GeometryPipeline {
|
||||||
|
public:
|
||||||
|
GeometryPipelineImpl(
|
||||||
|
const proto::PerspectiveCamera& perspective_camera, //
|
||||||
|
const proto::Mesh3d& canonical_mesh, //
|
||||||
|
uint32_t canonical_mesh_vertex_size, //
|
||||||
|
uint32_t canonical_mesh_num_vertices,
|
||||||
|
uint32_t canonical_mesh_vertex_position_offset,
|
||||||
|
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
|
||||||
|
: perspective_camera_(perspective_camera),
|
||||||
|
canonical_mesh_(canonical_mesh),
|
||||||
|
canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
|
||||||
|
canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
|
||||||
|
canonical_mesh_vertex_position_offset_(
|
||||||
|
canonical_mesh_vertex_position_offset),
|
||||||
|
space_converter_(std::move(space_converter)) {}
|
||||||
|
|
||||||
|
absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
|
||||||
|
const std::vector<mediapipe::NormalizedLandmarkList>&
|
||||||
|
multi_face_landmarks,
|
||||||
|
int frame_width, int frame_height) const override {
|
||||||
|
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
|
||||||
|
<< "Invalid frame dimensions!";
|
||||||
|
|
||||||
|
// Create a perspective camera frustum to be shared for geometry estimation
|
||||||
|
// per each face.
|
||||||
|
PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
|
||||||
|
frame_height);
|
||||||
|
|
||||||
|
std::vector<proto::FaceGeometry> multi_face_geometry;
|
||||||
|
|
||||||
|
// From this point, the meaning of "face landmarks" is clarified further as
|
||||||
|
// "screen face landmarks". This is done do distinguish from "metric face
|
||||||
|
// landmarks" that are derived during the face geometry estimation process.
|
||||||
|
for (const mediapipe::NormalizedLandmarkList& screen_face_landmarks :
|
||||||
|
multi_face_landmarks) {
|
||||||
|
// Having a too compact screen landmark list will result in numerical
|
||||||
|
// instabilities, therefore such faces are filtered.
|
||||||
|
if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the screen landmarks into the metric landmarks and get the pose
|
||||||
|
// transformation matrix.
|
||||||
|
mediapipe::LandmarkList metric_face_landmarks;
|
||||||
|
Eigen::Matrix4f pose_transform_mat;
|
||||||
|
MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
|
||||||
|
metric_face_landmarks,
|
||||||
|
pose_transform_mat))
|
||||||
|
<< "Failed to convert landmarks from the screen to the metric space!";
|
||||||
|
|
||||||
|
// Pack geometry data for this face.
|
||||||
|
proto::FaceGeometry face_geometry;
|
||||||
|
proto::Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
|
||||||
|
// Copy the canonical face mesh as the face geometry mesh.
|
||||||
|
mutable_mesh->CopyFrom(canonical_mesh_);
|
||||||
|
// Replace XYZ vertex mesh coodinates with the metric landmark positions.
|
||||||
|
for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
|
||||||
|
uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
|
||||||
|
canonical_mesh_vertex_position_offset_;
|
||||||
|
|
||||||
|
mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
|
||||||
|
metric_face_landmarks.landmark(i).x());
|
||||||
|
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
|
||||||
|
metric_face_landmarks.landmark(i).y());
|
||||||
|
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
|
||||||
|
metric_face_landmarks.landmark(i).z());
|
||||||
|
}
|
||||||
|
// Populate the face pose transformation matrix.
|
||||||
|
mediapipe::MatrixDataProtoFromMatrix(
|
||||||
|
pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
|
||||||
|
|
||||||
|
multi_face_geometry.push_back(face_geometry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return multi_face_geometry;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static bool IsScreenLandmarkListTooCompact(
|
||||||
|
const mediapipe::NormalizedLandmarkList& screen_landmarks) {
|
||||||
|
float mean_x = 0.f;
|
||||||
|
float mean_y = 0.f;
|
||||||
|
for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
|
||||||
|
const auto& landmark = screen_landmarks.landmark(i);
|
||||||
|
mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
|
||||||
|
mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float max_sq_dist = 0.f;
|
||||||
|
for (const auto& landmark : screen_landmarks.landmark()) {
|
||||||
|
const float d_x = landmark.x() - mean_x;
|
||||||
|
const float d_y = landmark.y() - mean_y;
|
||||||
|
max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
|
||||||
|
return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
const proto::PerspectiveCamera perspective_camera_;
|
||||||
|
const proto::Mesh3d canonical_mesh_;
|
||||||
|
const uint32_t canonical_mesh_vertex_size_;
|
||||||
|
const uint32_t canonical_mesh_num_vertices_;
|
||||||
|
const uint32_t canonical_mesh_vertex_position_offset_;
|
||||||
|
|
||||||
|
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||||
|
const proto::Environment& environment,
|
||||||
|
const proto::GeometryPipelineMetadata& metadata) {
|
||||||
|
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||||
|
<< "Invalid environment!";
|
||||||
|
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
|
||||||
|
<< "Invalid geometry pipeline metadata!";
|
||||||
|
|
||||||
|
const auto& canonical_mesh = metadata.canonical_mesh();
|
||||||
|
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||||
|
VertexComponent::POSITION))
|
||||||
|
<< "Canonical face mesh must have the `POSITION` vertex component!";
|
||||||
|
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||||
|
VertexComponent::TEX_COORD))
|
||||||
|
<< "Canonical face mesh must have the `TEX_COORD` vertex component!";
|
||||||
|
|
||||||
|
uint32_t canonical_mesh_vertex_size =
|
||||||
|
GetVertexSize(canonical_mesh.vertex_type());
|
||||||
|
uint32_t canonical_mesh_num_vertices =
|
||||||
|
canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
|
||||||
|
uint32_t canonical_mesh_vertex_position_offset =
|
||||||
|
GetVertexComponentOffset(canonical_mesh.vertex_type(),
|
||||||
|
VertexComponent::POSITION)
|
||||||
|
.value();
|
||||||
|
|
||||||
|
// Put the Procrustes landmark basis into Eigen matrices for an easier access.
|
||||||
|
Eigen::Matrix3Xf canonical_metric_landmarks =
|
||||||
|
Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
|
||||||
|
Eigen::VectorXf landmark_weights =
|
||||||
|
Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
|
||||||
|
|
||||||
|
for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
|
||||||
|
uint32_t vertex_buffer_offset =
|
||||||
|
canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
|
||||||
|
|
||||||
|
canonical_metric_landmarks(0, i) =
|
||||||
|
canonical_mesh.vertex_buffer(vertex_buffer_offset);
|
||||||
|
canonical_metric_landmarks(1, i) =
|
||||||
|
canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
|
||||||
|
canonical_metric_landmarks(2, i) =
|
||||||
|
canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const proto::WeightedLandmarkRef& wlr :
|
||||||
|
metadata.procrustes_landmark_basis()) {
|
||||||
|
uint32_t landmark_id = wlr.landmark_id();
|
||||||
|
landmark_weights(landmark_id) = wlr.weight();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GeometryPipeline> result =
|
||||||
|
absl::make_unique<GeometryPipelineImpl>(
|
||||||
|
environment.perspective_camera(), canonical_mesh,
|
||||||
|
canonical_mesh_vertex_size, canonical_mesh_num_vertices,
|
||||||
|
canonical_mesh_vertex_position_offset,
|
||||||
|
absl::make_unique<ScreenToMetricSpaceConverter>(
|
||||||
|
environment.origin_point_location(),
|
||||||
|
metadata.input_source() == proto::InputSource::DEFAULT
|
||||||
|
? proto::InputSource::FACE_LANDMARK_PIPELINE
|
||||||
|
: metadata.input_source(),
|
||||||
|
std::move(canonical_metric_landmarks),
|
||||||
|
std::move(landmark_weights),
|
||||||
|
CreateFloatPrecisionProcrustesSolver()));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,69 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||||
|
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
|
||||||
|
// Encapsulates a stateless estimator of facial geometry in a Metric space based
|
||||||
|
// on the normalized face landmarks in the Screen space.
|
||||||
|
class GeometryPipeline {
|
||||||
|
public:
|
||||||
|
virtual ~GeometryPipeline() = default;
|
||||||
|
|
||||||
|
// Estimates geometry data for multiple faces.
|
||||||
|
//
|
||||||
|
// Returns an error status if any of the passed arguments is invalid.
|
||||||
|
//
|
||||||
|
// The result includes face geometry data for a subset of the input faces,
|
||||||
|
// however geometry data for some faces might be missing. This may happen if
|
||||||
|
// it'd be unstable to estimate the facial geometry based on a corresponding
|
||||||
|
// face landmark list for any reason (for example, if the landmark list is too
|
||||||
|
// compact).
|
||||||
|
//
|
||||||
|
// Each face landmark list must have the same number of landmarks as was
|
||||||
|
// passed upon initialization via the canonical face mesh (as a part of the
|
||||||
|
// geometry pipeline metadata).
|
||||||
|
//
|
||||||
|
// Both `frame_width` and `frame_height` must be positive.
|
||||||
|
virtual absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
|
||||||
|
const std::vector<mediapipe::NormalizedLandmarkList>&
|
||||||
|
multi_face_landmarks,
|
||||||
|
int frame_width, int frame_height) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Creates an instance of `GeometryPipeline`.
|
||||||
|
//
|
||||||
|
// Both `environment` and `metadata` must be valid (for details, please refer to
|
||||||
|
// the proto message definition comments and/or `validation_utils.h/cc`).
|
||||||
|
//
|
||||||
|
// Canonical face mesh (defined as a part of `metadata`) must have the
|
||||||
|
// `POSITION` and the `TEX_COORD` vertex components.
|
||||||
|
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||||
|
const proto::Environment& environment,
|
||||||
|
const proto::GeometryPipelineMetadata& metadata);
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
103
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
Normal file
103
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
|
||||||
|
switch (vertex_component) {
|
||||||
|
case VertexComponent::POSITION:
|
||||||
|
case VertexComponent::TEX_COORD:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
|
||||||
|
switch (vertex_component) {
|
||||||
|
case VertexComponent::POSITION:
|
||||||
|
return 3;
|
||||||
|
case VertexComponent::TEX_COORD:
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
|
||||||
|
switch (vertex_component) {
|
||||||
|
case VertexComponent::POSITION:
|
||||||
|
return 0;
|
||||||
|
case VertexComponent::TEX_COORD:
|
||||||
|
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type) {
|
||||||
|
switch (vertex_type) {
|
||||||
|
case proto::Mesh3d::VERTEX_PT:
|
||||||
|
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
|
||||||
|
GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type) {
|
||||||
|
switch (primitive_type) {
|
||||||
|
case proto::Mesh3d::TRIANGLE:
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
|
||||||
|
VertexComponent vertex_component) {
|
||||||
|
switch (vertex_type) {
|
||||||
|
case proto::Mesh3d::VERTEX_PT:
|
||||||
|
return HasVertexComponentVertexPT(vertex_component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||||
|
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||||
|
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||||
|
<< "A given vertex type doesn't have the requested component!";
|
||||||
|
|
||||||
|
switch (vertex_type) {
|
||||||
|
case proto::Mesh3d::VERTEX_PT:
|
||||||
|
return GetVertexComponentOffsetVertexPT(vertex_component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||||
|
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||||
|
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||||
|
<< "A given vertex type doesn't have the requested component!";
|
||||||
|
|
||||||
|
switch (vertex_type) {
|
||||||
|
case proto::Mesh3d::VERTEX_PT:
|
||||||
|
return GetVertexComponentSizeVertexPT(vertex_component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
51
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
Normal file
51
mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||||
|
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
|
||||||
|
enum class VertexComponent { POSITION, TEX_COORD };
|
||||||
|
|
||||||
|
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type);
|
||||||
|
|
||||||
|
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
|
||||||
|
VertexComponent vertex_component);
|
||||||
|
|
||||||
|
// Computes the vertex component offset.
|
||||||
|
//
|
||||||
|
// Returns an error status if a given vertex type doesn't have the requested
|
||||||
|
// component.
|
||||||
|
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||||
|
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||||
|
|
||||||
|
// Computes the vertex component size.
|
||||||
|
//
|
||||||
|
// Returns an error status if a given vertex type doesn't have the requested
|
||||||
|
// component.
|
||||||
|
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||||
|
proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
|
@ -0,0 +1,264 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
|
||||||
|
public:
|
||||||
|
FloatPrecisionProcrustesSolver() = default;
|
||||||
|
|
||||||
|
absl::Status SolveWeightedOrthogonalProblem(
|
||||||
|
const Eigen::Matrix3Xf& source_points, //
|
||||||
|
const Eigen::Matrix3Xf& target_points, //
|
||||||
|
const Eigen::VectorXf& point_weights,
|
||||||
|
Eigen::Matrix4f& transform_mat) const override {
|
||||||
|
// Validate inputs.
|
||||||
|
MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
|
||||||
|
<< "Failed to validate weighted orthogonal problem input points!";
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
ValidatePointWeights(source_points.cols(), point_weights))
|
||||||
|
<< "Failed to validate weighted orthogonal problem point weights!";
|
||||||
|
|
||||||
|
// Extract square root from the point weights.
|
||||||
|
Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
|
||||||
|
|
||||||
|
// Try to solve the WEOP problem.
|
||||||
|
MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
|
||||||
|
source_points, target_points, sqrt_weights, transform_mat))
|
||||||
|
<< "Failed to solve the WEOP problem!";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||||
|
|
||||||
|
static absl::Status ValidateInputPoints(
|
||||||
|
const Eigen::Matrix3Xf& source_points,
|
||||||
|
const Eigen::Matrix3Xf& target_points) {
|
||||||
|
RET_CHECK_GT(source_points.cols(), 0)
|
||||||
|
<< "The number of source points must be positive!";
|
||||||
|
|
||||||
|
RET_CHECK_EQ(source_points.cols(), target_points.cols())
|
||||||
|
<< "The number of source and target points must be equal!";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
static absl::Status ValidatePointWeights(
|
||||||
|
int num_points, const Eigen::VectorXf& point_weights) {
|
||||||
|
RET_CHECK_GT(point_weights.size(), 0)
|
||||||
|
<< "The number of point weights must be positive!";
|
||||||
|
|
||||||
|
RET_CHECK_EQ(point_weights.size(), num_points)
|
||||||
|
<< "The number of points and point weights must be equal!";
|
||||||
|
|
||||||
|
float total_weight = 0.f;
|
||||||
|
for (int i = 0; i < num_points; ++i) {
|
||||||
|
RET_CHECK_GE(point_weights(i), 0.f)
|
||||||
|
<< "Each point weight must be non-negative!";
|
||||||
|
|
||||||
|
total_weight += point_weights(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
|
||||||
|
<< "The total point weight is too small!";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
static Eigen::VectorXf ExtractSquareRoot(
|
||||||
|
const Eigen::VectorXf& point_weights) {
|
||||||
|
Eigen::VectorXf sqrt_weights(point_weights);
|
||||||
|
for (int i = 0; i < sqrt_weights.size(); ++i) {
|
||||||
|
sqrt_weights(i) = std::sqrt(sqrt_weights(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sqrt_weights;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
|
||||||
|
// a single 4x4 transformation matrix.
|
||||||
|
static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
|
||||||
|
const Eigen::Vector3f& t) {
|
||||||
|
Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
|
||||||
|
result.leftCols(3).topRows(3) = r_and_s;
|
||||||
|
result.col(3).topRows(3) = t;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The weighted problem is thoroughly addressed in Section 2.4 of:
|
||||||
|
// D. Akca, Generalized Procrustes analysis and its applications
|
||||||
|
// in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
|
||||||
|
//
|
||||||
|
// Notable differences in the code presented here are:
|
||||||
|
//
|
||||||
|
// * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
|
||||||
|
// Our W_p is diagonal (equal to diag(sqrt_weights^2)),
|
||||||
|
// so we can just set Q = diag(sqrt_weights) instead.
|
||||||
|
//
|
||||||
|
// * In the paper, the problem is presented as
|
||||||
|
// (for W_k = I and W_p = tranposed(Q) Q):
|
||||||
|
// || Q (c A T + j tranposed(t) - B) || -> min.
|
||||||
|
//
|
||||||
|
// We reformulate it as an equivalent minimization of the transpose's
|
||||||
|
// norm:
|
||||||
|
// || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
|
||||||
|
// where tranposed(A) and tranposed(B) are the source and the target point
|
||||||
|
// clouds, respectively, c tranposed(T) is the rotation+scaling R sought
|
||||||
|
// for, and Q is diag(sqrt_weights).
|
||||||
|
//
|
||||||
|
// Most of the derivations are therefore transposed.
|
||||||
|
//
|
||||||
|
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||||
|
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||||
|
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||||
|
static absl::Status InternalSolveWeightedOrthogonalProblem(
|
||||||
|
const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
|
||||||
|
const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
|
||||||
|
// tranposed(A_w).
|
||||||
|
Eigen::Matrix3Xf weighted_sources =
|
||||||
|
sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||||
|
// tranposed(B_w).
|
||||||
|
Eigen::Matrix3Xf weighted_targets =
|
||||||
|
targets.array().rowwise() * sqrt_weights.array().transpose();
|
||||||
|
|
||||||
|
// w = tranposed(j_w) j_w.
|
||||||
|
float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
|
||||||
|
|
||||||
|
// Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
|
||||||
|
// Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
|
||||||
|
//
|
||||||
|
// tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
|
||||||
|
// (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
|
||||||
|
//
|
||||||
|
// where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
|
||||||
|
Eigen::Matrix3Xf twice_weighted_sources =
|
||||||
|
weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||||
|
Eigen::Vector3f source_center_of_mass =
|
||||||
|
twice_weighted_sources.rowwise().sum() / total_weight;
|
||||||
|
// tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
|
||||||
|
// tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
|
||||||
|
Eigen::Matrix3Xf centered_weighted_sources =
|
||||||
|
weighted_sources - source_center_of_mass * sqrt_weights.transpose();
|
||||||
|
|
||||||
|
Eigen::Matrix3f rotation;
|
||||||
|
MP_RETURN_IF_ERROR(ComputeOptimalRotation(
|
||||||
|
weighted_targets * centered_weighted_sources.transpose(), rotation))
|
||||||
|
<< "Failed to compute the optimal rotation!";
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
float scale,
|
||||||
|
ComputeOptimalScale(centered_weighted_sources, weighted_sources,
|
||||||
|
weighted_targets, rotation),
|
||||||
|
_ << "Failed to compute the optimal scale!");
|
||||||
|
|
||||||
|
// R = c tranposed(T).
|
||||||
|
Eigen::Matrix3f rotation_and_scale = scale * rotation;
|
||||||
|
|
||||||
|
// Compute optimal translation for the weighted problem.
|
||||||
|
|
||||||
|
// tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
|
||||||
|
const auto pointwise_diffs =
|
||||||
|
weighted_targets - rotation_and_scale * weighted_sources;
|
||||||
|
// Multiplication by j_w is a respectively weighted column sum.
|
||||||
|
// (54) from the paper.
|
||||||
|
const auto weighted_pointwise_diffs =
|
||||||
|
pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
|
||||||
|
Eigen::Vector3f translation =
|
||||||
|
weighted_pointwise_diffs.rowwise().sum() / total_weight;
|
||||||
|
|
||||||
|
transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// `design_matrix` is a transposed LHS of (51) in the paper.
|
||||||
|
//
|
||||||
|
// Note: the output `rotation` argument is used instead of `StatusOr<>`
|
||||||
|
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||||
|
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||||
|
static absl::Status ComputeOptimalRotation(
|
||||||
|
const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
|
||||||
|
RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
|
||||||
|
<< "Design matrix norm is too small!";
|
||||||
|
|
||||||
|
Eigen::JacobiSVD<Eigen::Matrix3f> svd(
|
||||||
|
design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
|
||||||
|
|
||||||
|
Eigen::Matrix3f postrotation = svd.matrixU();
|
||||||
|
Eigen::Matrix3f prerotation = svd.matrixV().transpose();
|
||||||
|
|
||||||
|
// Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
|
||||||
|
// see "4.6 Constrained orthogonal Procrustes problems"
|
||||||
|
// in the Gower & Dijksterhuis's book "Procrustes Analysis".
|
||||||
|
// We flip the sign of the least singular value along with a column in W.
|
||||||
|
//
|
||||||
|
// Note that now the sum of singular values doesn't work for scale
|
||||||
|
// estimation due to this sign flip.
|
||||||
|
if (postrotation.determinant() * prerotation.determinant() <
|
||||||
|
static_cast<float>(0)) {
|
||||||
|
postrotation.col(2) *= static_cast<float>(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transposed (52) from the paper.
|
||||||
|
rotation = postrotation * prerotation;
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
static absl::StatusOr<float> ComputeOptimalScale(
|
||||||
|
const Eigen::Matrix3Xf& centered_weighted_sources,
|
||||||
|
const Eigen::Matrix3Xf& weighted_sources,
|
||||||
|
const Eigen::Matrix3Xf& weighted_targets,
|
||||||
|
const Eigen::Matrix3f& rotation) {
|
||||||
|
// tranposed(T) tranposed(A_w) (I - C).
|
||||||
|
const auto rotated_centered_weighted_sources =
|
||||||
|
rotation * centered_weighted_sources;
|
||||||
|
// Use the identity trace(A B) = sum(A * B^T)
|
||||||
|
// to avoid building large intermediate matrices (* is Hadamard product).
|
||||||
|
// (53) from the paper.
|
||||||
|
float numerator =
|
||||||
|
rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
|
||||||
|
float denominator =
|
||||||
|
centered_weighted_sources.cwiseProduct(weighted_sources).sum();
|
||||||
|
|
||||||
|
RET_CHECK_GT(denominator, kAbsoluteErrorEps)
|
||||||
|
<< "Scale expression denominator is too small!";
|
||||||
|
RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
|
||||||
|
<< "Scale is too small!";
|
||||||
|
|
||||||
|
return numerator / denominator;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
|
||||||
|
return absl::make_unique<FloatPrecisionProcrustesSolver>();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||||
|
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "Eigen/Dense"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
|
||||||
|
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
|
||||||
|
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
|
||||||
|
// https://doi.org/10.3929/ethz-a-004656648.
|
||||||
|
//
|
||||||
|
// Given the source and the target point clouds, the algorithm estimates
|
||||||
|
// a 4x4 transformation matrix featuring the following semantic components:
|
||||||
|
//
|
||||||
|
// * Uniform scale
|
||||||
|
// * Rotation
|
||||||
|
// * Translation
|
||||||
|
//
|
||||||
|
// The matrix maps the source point cloud into the target point cloud minimizing
|
||||||
|
// the Mean Squared Error.
|
||||||
|
class ProcrustesSolver {
|
||||||
|
public:
|
||||||
|
virtual ~ProcrustesSolver() = default;
|
||||||
|
|
||||||
|
// Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
|
||||||
|
//
|
||||||
|
// All `source_points`, `target_points` and `point_weights` must define the
|
||||||
|
// same number of points. Elements of `point_weights` must be non-negative.
|
||||||
|
//
|
||||||
|
// A too small diameter of either of the point clouds will likely lead to
|
||||||
|
// numerical instabilities and failure to estimate the transformation.
|
||||||
|
//
|
||||||
|
// A too small point cloud total weight will likely lead to numerical
|
||||||
|
// instabilities and failure to estimate the transformation too.
|
||||||
|
//
|
||||||
|
// Small point coordinate deviation for either of the point cloud will likely
|
||||||
|
// result in a failure as it will make the solution very unstable if possible.
|
||||||
|
//
|
||||||
|
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||||
|
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||||
|
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||||
|
virtual absl::Status SolveWeightedOrthogonalProblem(
|
||||||
|
const Eigen::Matrix3Xf& source_points, //
|
||||||
|
const Eigen::Matrix3Xf& target_points, //
|
||||||
|
const Eigen::VectorXf& point_weights, //
|
||||||
|
Eigen::Matrix4f& transform_mat) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
127
mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
Normal file
127
mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||||
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
|
||||||
|
absl::Status ValidatePerspectiveCamera(
|
||||||
|
const proto::PerspectiveCamera& perspective_camera) {
|
||||||
|
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||||
|
|
||||||
|
RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
|
||||||
|
<< "Near Z must be greater than 0 with a margin of 10^{-9}!";
|
||||||
|
|
||||||
|
RET_CHECK_GT(perspective_camera.far(),
|
||||||
|
perspective_camera.near() + kAbsoluteErrorEps)
|
||||||
|
<< "Far Z must be greater than Near Z with a margin of 10^{-9}!";
|
||||||
|
|
||||||
|
RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
|
||||||
|
<< "Vertical FOV must be positive with a margin of 10^{-9}!";
|
||||||
|
|
||||||
|
RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
|
||||||
|
180.f)
|
||||||
|
<< "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status ValidateEnvironment(const proto::Environment& environment) {
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
ValidatePerspectiveCamera(environment.perspective_camera()))
|
||||||
|
<< "Invalid perspective camera!";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d) {
|
||||||
|
const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
|
||||||
|
const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
|
||||||
|
|
||||||
|
RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
|
||||||
|
<< "Vertex buffer size must a multiple of the vertex size!";
|
||||||
|
|
||||||
|
RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
|
||||||
|
<< "Index buffer size must a multiple of the primitive size!";
|
||||||
|
|
||||||
|
const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
|
||||||
|
for (uint32_t idx : mesh_3d.index_buffer()) {
|
||||||
|
RET_CHECK_LT(idx, num_vertices)
|
||||||
|
<< "All mesh indices must refer to an existing vertex!";
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry) {
|
||||||
|
MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
|
||||||
|
|
||||||
|
static constexpr char kInvalid4x4MatrixMessage[] =
|
||||||
|
"Pose transformation matrix must be a 4x4 matrix!";
|
||||||
|
|
||||||
|
const mediapipe::MatrixData& pose_transform_matrix =
|
||||||
|
face_geometry.pose_transform_matrix();
|
||||||
|
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||||
|
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||||
|
RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
|
||||||
|
<< kInvalid4x4MatrixMessage;
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status ValidateGeometryPipelineMetadata(
|
||||||
|
const proto::GeometryPipelineMetadata& metadata) {
|
||||||
|
MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
|
||||||
|
<< "Invalid canonical mesh!";
|
||||||
|
|
||||||
|
RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
|
||||||
|
|
||||||
|
<< "Procrustes landmark basis must be non-empty!";
|
||||||
|
|
||||||
|
const int num_vertices =
|
||||||
|
metadata.canonical_mesh().vertex_buffer_size() /
|
||||||
|
GetVertexSize(metadata.canonical_mesh().vertex_type());
|
||||||
|
for (const proto::WeightedLandmarkRef& wlr :
|
||||||
|
metadata.procrustes_landmark_basis()) {
|
||||||
|
RET_CHECK_LT(wlr.landmark_id(), num_vertices)
|
||||||
|
<< "All Procrustes basis indices must refer to an existing canonical "
|
||||||
|
"mesh vertex!";
|
||||||
|
|
||||||
|
RET_CHECK_GE(wlr.weight(), 0.f)
|
||||||
|
<< "All Procrustes basis landmarks must have a non-negative weight!";
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
|
||||||
|
RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
|
||||||
|
RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||||
|
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||||
|
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
|
||||||
|
|
||||||
|
namespace mediapipe::tasks::vision::face_geometry {
|
||||||
|
|
||||||
|
// Validates `perspective_camera`.
|
||||||
|
//
|
||||||
|
// Near Z must be greater than 0 with a margin of `1e-9`.
|
||||||
|
// Far Z must be greater than Near Z with a margin of `1e-9`.
|
||||||
|
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
|
||||||
|
// edges.
|
||||||
|
absl::Status ValidatePerspectiveCamera(
|
||||||
|
const proto::PerspectiveCamera& perspective_camera);
|
||||||
|
|
||||||
|
// Validates `environment`.
|
||||||
|
//
|
||||||
|
// Environment's perspective camera must be valid.
|
||||||
|
absl::Status ValidateEnvironment(const proto::Environment& environment);
|
||||||
|
|
||||||
|
// Validates `mesh_3d`.
|
||||||
|
//
|
||||||
|
// Mesh vertex buffer size must a multiple of the vertex size.
|
||||||
|
// Mesh index buffer size must a multiple of the primitive size.
|
||||||
|
// All mesh indices must reference an existing mesh vertex.
|
||||||
|
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d);
|
||||||
|
|
||||||
|
// Validates `face_geometry`.
|
||||||
|
//
|
||||||
|
// Face mesh must be valid.
|
||||||
|
// Face pose transformation matrix must be a 4x4 matrix.
|
||||||
|
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry);
|
||||||
|
|
||||||
|
// Validates `metadata`.
|
||||||
|
//
|
||||||
|
// Canonical face mesh must be valid.
|
||||||
|
// Procrustes landmark basis must be non-empty.
|
||||||
|
// All Procrustes basis indices must reference an existing canonical mesh
|
||||||
|
// vertex.
|
||||||
|
// All Procrustes basis landmarks must have a non-negative weight.
|
||||||
|
absl::Status ValidateGeometryPipelineMetadata(
|
||||||
|
const proto::GeometryPipelineMetadata& metadata);
|
||||||
|
|
||||||
|
// Validates frame dimensions.
|
||||||
|
//
|
||||||
|
// Both frame width and frame height must be positive.
|
||||||
|
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
|
||||||
|
|
||||||
|
} // namespace mediapipe::tasks::vision::face_geometry
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
46
mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
Normal file
46
mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "environment_proto",
|
||||||
|
srcs = ["environment.proto"],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "face_geometry_proto",
|
||||||
|
srcs = ["face_geometry.proto"],
|
||||||
|
deps = [
|
||||||
|
":mesh_3d_proto",
|
||||||
|
"//mediapipe/framework/formats:matrix_data_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "geometry_pipeline_metadata_proto",
|
||||||
|
srcs = ["geometry_pipeline_metadata.proto"],
|
||||||
|
deps = [
|
||||||
|
":mesh_3d_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "mesh_3d_proto",
|
||||||
|
srcs = ["mesh_3d.proto"],
|
||||||
|
)
|
|
@ -0,0 +1,84 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks.vision.face_geometry.proto;
|
||||||
|
|
||||||
|
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||||
|
option java_outer_classname = "EnvironmentProto";
|
||||||
|
|
||||||
|
// Defines the (0, 0) origin point location of the environment.
|
||||||
|
//
|
||||||
|
// The variation in the origin point location can be traced back to the memory
|
||||||
|
// layout of the camera video frame buffers.
|
||||||
|
//
|
||||||
|
// Usually, the memory layout for most CPU (and also some GPU) camera video
|
||||||
|
// frame buffers results in having the (0, 0) origin point located in the
|
||||||
|
// Top Left corner.
|
||||||
|
//
|
||||||
|
// On the contrary, the memory layout for most GPU camera video frame buffers
|
||||||
|
// results in having the (0, 0) origin point located in the Bottom Left corner.
|
||||||
|
//
|
||||||
|
// Let's consider the following example:
|
||||||
|
//
|
||||||
|
// (A) ---------------+
|
||||||
|
// ___ |
|
||||||
|
// | (1) | | |
|
||||||
|
// | / \ | | |
|
||||||
|
// | |---|===|-| |
|
||||||
|
// | |---| | | |
|
||||||
|
// | / \ | | |
|
||||||
|
// | | | | | |
|
||||||
|
// | | (2) |=| | |
|
||||||
|
// | | | | | |
|
||||||
|
// | |_______| |_| |
|
||||||
|
// | |@| |@| | | |
|
||||||
|
// | ___________|_|_ |
|
||||||
|
// |
|
||||||
|
// (B) ---------------+
|
||||||
|
//
|
||||||
|
// On this example, (1) and (2) have the same X coordinate regardless of the
|
||||||
|
// origin point location. However, having the origin point located at (A)
|
||||||
|
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
|
||||||
|
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
|
||||||
|
// results in (1) having a greater Y coordinate if compared to (2).
|
||||||
|
//
|
||||||
|
// Providing the correct origin point location for your environment and making
|
||||||
|
// sure all the input landmarks are in-sync with this location is crucial
|
||||||
|
// for receiving the correct output face geometry and visual renders.
|
||||||
|
enum OriginPointLocation {
|
||||||
|
BOTTOM_LEFT_CORNER = 1;
|
||||||
|
TOP_LEFT_CORNER = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The perspective camera is defined through its vertical FOV angle and the
|
||||||
|
// Z-clipping planes. The aspect ratio is a runtime variable for the face
|
||||||
|
// geometry module and should be provided alongside the face landmarks in order
|
||||||
|
// to estimate the face geometry on a given frame.
|
||||||
|
//
|
||||||
|
// More info on Perspective Cameras:
|
||||||
|
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
|
||||||
|
message PerspectiveCamera {
|
||||||
|
// `0 < vertical_fov_degrees < 180`.
|
||||||
|
optional float vertical_fov_degrees = 1;
|
||||||
|
// `0 < near < far`.
|
||||||
|
optional float near = 2;
|
||||||
|
optional float far = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Environment {
|
||||||
|
optional OriginPointLocation origin_point_location = 1;
|
||||||
|
optional PerspectiveCamera perspective_camera = 2;
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks.vision.face_geometry.proto;
|
||||||
|
|
||||||
|
import "mediapipe/framework/formats/matrix_data.proto";
|
||||||
|
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
|
||||||
|
|
||||||
|
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||||
|
option java_outer_classname = "FaceGeometryProto";
|
||||||
|
|
||||||
|
// Defines the face geometry pipeline estimation result format.
|
||||||
|
message FaceGeometry {
|
||||||
|
// Defines a mesh surface for a face. The face mesh vertex IDs are the same as
|
||||||
|
// the face landmark IDs.
|
||||||
|
//
|
||||||
|
// XYZ coordinates exist in the right-handed Metric 3D space configured by an
|
||||||
|
// environment. UV coodinates are taken from the canonical face mesh model.
|
||||||
|
//
|
||||||
|
// XY coordinates are guaranteed to match the screen positions of
|
||||||
|
// the input face landmarks after (1) being multiplied by the face pose
|
||||||
|
// transformation matrix and then (2) being projected with a perspective
|
||||||
|
// camera matrix of the same environment.
|
||||||
|
//
|
||||||
|
// NOTE: the triangular topology of the face mesh is only useful when derived
|
||||||
|
// from the 468 face landmarks, not from the 6 face detection landmarks
|
||||||
|
// (keypoints). The former don't cover the entire face and this mesh is
|
||||||
|
// defined here only to comply with the API. It should be considered as
|
||||||
|
// a placeholder and/or for debugging purposes.
|
||||||
|
//
|
||||||
|
// Use the face geometry derived from the face detection landmarks
|
||||||
|
// (keypoints) for the face pose transformation matrix, not the mesh.
|
||||||
|
optional Mesh3d mesh = 1;
|
||||||
|
|
||||||
|
// Defines a face pose transformation matrix, which provides mapping from
|
||||||
|
// the static canonical face model to the runtime face. Tries to distinguish
|
||||||
|
// a head pose change from a facial expression change and to only reflect the
|
||||||
|
// former.
|
||||||
|
//
|
||||||
|
// Is a 4x4 matrix and contains only the following components:
|
||||||
|
// * Uniform scale
|
||||||
|
// * Rotation
|
||||||
|
// * Translation
|
||||||
|
//
|
||||||
|
// The last row is guaranteed to be `[0 0 0 1]`.
|
||||||
|
optional mediapipe.MatrixData pose_transform_matrix = 2;
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks.vision.face_geometry.proto;
|
||||||
|
|
||||||
|
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
|
||||||
|
|
||||||
|
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||||
|
option java_outer_classname = "GeometryPipelineMetadataProto";
|
||||||
|
|
||||||
|
enum InputSource {
|
||||||
|
DEFAULT = 0; // FACE_LANDMARK_PIPELINE
|
||||||
|
FACE_LANDMARK_PIPELINE = 1;
|
||||||
|
FACE_DETECTION_PIPELINE = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message WeightedLandmarkRef {
|
||||||
|
// Defines the landmark ID. References an existing face landmark ID.
|
||||||
|
optional uint32 landmark_id = 1;
|
||||||
|
// Defines the landmark weight. The larger the weight the more influence this
|
||||||
|
// landmark has in the basis.
|
||||||
|
//
|
||||||
|
// Is positive.
|
||||||
|
optional float weight = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next field ID: 4
|
||||||
|
message GeometryPipelineMetadata {
|
||||||
|
// Defines the source of the input landmarks to let the underlying geometry
|
||||||
|
// pipeline to adjust in order to produce the best results.
|
||||||
|
//
|
||||||
|
// Face landmark pipeline is expected to produce 3D landmarks with relative Z
|
||||||
|
// coordinate, which is scaled as the X coordinate assuming the weak
|
||||||
|
// perspective projection camera model.
|
||||||
|
//
|
||||||
|
// Face landmark pipeline is expected to produce 2D landmarks with Z
|
||||||
|
// coordinate being equal to 0.
|
||||||
|
optional InputSource input_source = 3;
|
||||||
|
// Defines a mesh surface for a canonical face. The canonical face mesh vertex
|
||||||
|
// IDs are the same as the face landmark IDs.
|
||||||
|
//
|
||||||
|
// XYZ coordinates are defined in centimeter units.
|
||||||
|
optional Mesh3d canonical_mesh = 1;
|
||||||
|
// Defines a weighted landmark basis for running the Procrustes solver
|
||||||
|
// algorithm inside the geometry pipeline.
|
||||||
|
//
|
||||||
|
// A good basis sets face landmark weights in way to distinguish a head pose
|
||||||
|
// change from a facial expression change and to only respond to the former.
|
||||||
|
repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
|
||||||
|
}
|
41
mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
Normal file
41
mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks.vision.face_geometry.proto;
|
||||||
|
|
||||||
|
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
|
||||||
|
option java_outer_classname = "Mesh3dProto";
|
||||||
|
|
||||||
|
message Mesh3d {
|
||||||
|
enum VertexType {
|
||||||
|
// Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
|
||||||
|
VERTEX_PT = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum PrimitiveType {
|
||||||
|
// Is defined by 3 indices: triangle vertex IDs.
|
||||||
|
TRIANGLE = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
optional VertexType vertex_type = 1;
|
||||||
|
optional PrimitiveType primitive_type = 2;
|
||||||
|
// Vertex buffer size is a multiple of the vertex size (e.g., 5 for
|
||||||
|
// VERTEX_PT).
|
||||||
|
repeated float vertex_buffer = 3;
|
||||||
|
// Index buffer size is a multiple of the primitive size (e.g., 3 for
|
||||||
|
// TRIANGLE).
|
||||||
|
repeated uint32 index_buffer = 4;
|
||||||
|
}
|
108
mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
Normal file
108
mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "tensors_to_image_calculator_proto",
|
||||||
|
srcs = ["tensors_to_image_calculator.proto"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/gpu:gpu_origin_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tensors_to_image_calculator",
|
||||||
|
srcs = ["tensors_to_image_calculator.cc"],
|
||||||
|
copts = select({
|
||||||
|
"//mediapipe:apple": [
|
||||||
|
"-x objective-c++",
|
||||||
|
"-fobjc-arc", # enable reference-counting
|
||||||
|
],
|
||||||
|
"//conditions:default": [],
|
||||||
|
}),
|
||||||
|
features = ["-layering_check"], # allow depending on tensor_to_image_calculator_gpu_deps
|
||||||
|
linkopts = select({
|
||||||
|
"//mediapipe:apple": [
|
||||||
|
"-framework CoreVideo",
|
||||||
|
"-framework MetalKit",
|
||||||
|
],
|
||||||
|
"//conditions:default": [],
|
||||||
|
}),
|
||||||
|
deps = [
|
||||||
|
":tensors_to_image_calculator_cc_proto",
|
||||||
|
"@com_google_absl//absl/status",
|
||||||
|
"@com_google_absl//absl/strings",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework/api2:builder",
|
||||||
|
"//mediapipe/framework/api2:node",
|
||||||
|
"//mediapipe/framework/api2:packet",
|
||||||
|
"//mediapipe/framework/api2:port",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:tensor",
|
||||||
|
"//mediapipe/framework/port:logging",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:vector",
|
||||||
|
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||||
|
] + select({
|
||||||
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
|
"//conditions:default": ["tensor_to_image_calculator_gpu_deps"],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tensor_to_image_calculator_gpu_deps",
|
||||||
|
visibility = ["//visibility:private"],
|
||||||
|
deps = select({
|
||||||
|
"//mediapipe:android": [
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gl_quad_renderer",
|
||||||
|
"//mediapipe/gpu:gl_simple_shaders",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
|
||||||
|
],
|
||||||
|
"//mediapipe:ios": [
|
||||||
|
"//mediapipe/gpu:MPPMetalHelper",
|
||||||
|
"//mediapipe/gpu:MPPMetalUtil",
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
],
|
||||||
|
"//mediapipe:macos": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gl_quad_renderer",
|
||||||
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
|
||||||
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
)
|
|
@ -0,0 +1,439 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "mediapipe/framework/api2/node.h"
|
||||||
|
#include "mediapipe/framework/api2/packet.h"
|
||||||
|
#include "mediapipe/framework/api2/port.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
|
#include "mediapipe/framework/port/logging.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h"
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
#import <CoreVideo/CoreVideo.h>
|
||||||
|
#import <Metal/Metal.h>
|
||||||
|
#import <MetalKit/MetalKit.h>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h"
|
||||||
|
#import "mediapipe/gpu/MPPMetalHelper.h"
|
||||||
|
#else
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gl_quad_renderer.h"
|
||||||
|
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/gl_shader.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
namespace tasks {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using ::mediapipe::api2::Input;
|
||||||
|
using ::mediapipe::api2::Node;
|
||||||
|
using ::mediapipe::api2::Output;
|
||||||
|
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
using ::tflite::gpu::gl::GlProgram;
|
||||||
|
using ::tflite::gpu::gl::GlShader;
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
|
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||||
|
|
||||||
|
// Commonly used to compute the number of blocks to launch in a kernel.
|
||||||
|
static int NumGroups(const int size, const int group_size) { // NOLINT
|
||||||
|
return (size + group_size - 1) / group_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Converts a MediaPipe tensor to a MediaPipe Image.
|
||||||
|
//
|
||||||
|
// Input streams:
|
||||||
|
// TENSORS - std::vector<mediapipe::Tensor> that only contains one element.
|
||||||
|
//
|
||||||
|
// Output streams:
|
||||||
|
// OUTPUT - mediapipe::Image.
|
||||||
|
//
|
||||||
|
// TODO: Enable TensorsToImageCalculator to run on CPU.
|
||||||
|
class TensorsToImageCalculator : public Node {
|
||||||
|
public:
|
||||||
|
static constexpr Input<std::vector<Tensor>> kInputTensors{"TENSORS"};
|
||||||
|
static constexpr Output<Image> kOutputImage{"IMAGE"};
|
||||||
|
|
||||||
|
MEDIAPIPE_NODE_CONTRACT(kInputTensors, kOutputImage);
|
||||||
|
|
||||||
|
static absl::Status UpdateContract(CalculatorContract* cc);
|
||||||
|
absl::Status Open(CalculatorContext* cc);
|
||||||
|
absl::Status Process(CalculatorContext* cc);
|
||||||
|
absl::Status Close(CalculatorContext* cc);
|
||||||
|
|
||||||
|
private:
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
bool metal_initialized_ = false;
|
||||||
|
MPPMetalHelper* gpu_helper_ = nullptr;
|
||||||
|
id<MTLComputePipelineState> to_buffer_program_;
|
||||||
|
|
||||||
|
absl::Status MetalSetup(CalculatorContext* cc);
|
||||||
|
absl::Status MetalProcess(CalculatorContext* cc);
|
||||||
|
#else
|
||||||
|
absl::Status GlSetup(CalculatorContext* cc);
|
||||||
|
|
||||||
|
GlCalculatorHelper gl_helper_;
|
||||||
|
|
||||||
|
bool gl_initialized_ = false;
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
std::unique_ptr<tflite::gpu::gl::GlProgram> gl_compute_program_;
|
||||||
|
const tflite::gpu::uint3 workgroup_size_ = {8, 8, 1};
|
||||||
|
#else
|
||||||
|
GLuint program_ = 0;
|
||||||
|
std::unique_ptr<mediapipe::QuadRenderer> gl_renderer_;
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
};
|
||||||
|
MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToImageCalculator);
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
|
||||||
|
#else
|
||||||
|
return GlCalculatorHelper::UpdateContract(cc);
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||||
|
RET_CHECK(gpu_helper_);
|
||||||
|
#else
|
||||||
|
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
|
||||||
|
return MetalProcess(cc);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
return gl_helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||||
|
if (!gl_initialized_) {
|
||||||
|
MP_RETURN_IF_ERROR(GlSetup(cc));
|
||||||
|
gl_initialized_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kInputTensors(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const auto& input_tensors = kInputTensors(cc).Get();
|
||||||
|
RET_CHECK_EQ(input_tensors.size(), 1)
|
||||||
|
<< "Expect 1 input tensor, but have " << input_tensors.size();
|
||||||
|
const int tensor_width = input_tensors[0].shape().dims[2];
|
||||||
|
const int tensor_height = input_tensors[0].shape().dims[1];
|
||||||
|
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
|
auto out_texture = std::make_unique<tflite::gpu::gl::GlTexture>();
|
||||||
|
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
|
||||||
|
tflite::gpu::DataType::UINT8, // GL_RGBA8
|
||||||
|
{tensor_width, tensor_height}, out_texture.get()));
|
||||||
|
|
||||||
|
const int output_index = 0;
|
||||||
|
glBindImageTexture(output_index, out_texture->id(), 0, GL_FALSE, 0,
|
||||||
|
GL_WRITE_ONLY, GL_RGBA8);
|
||||||
|
|
||||||
|
auto read_view = input_tensors[0].GetOpenGlBufferReadView();
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name());
|
||||||
|
|
||||||
|
const tflite::gpu::uint3 workload = {tensor_width, tensor_height, 1};
|
||||||
|
const tflite::gpu::uint3 workgroups =
|
||||||
|
tflite::gpu::DivideRoundUp(workload, workgroup_size_);
|
||||||
|
|
||||||
|
glUseProgram(gl_compute_program_->id());
|
||||||
|
glUniform2i(glGetUniformLocation(gl_compute_program_->id(), "out_size"),
|
||||||
|
tensor_width, tensor_height);
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(gl_compute_program_->Dispatch(workgroups));
|
||||||
|
|
||||||
|
auto texture_buffer = mediapipe::GlTextureBuffer::Wrap(
|
||||||
|
out_texture->target(), out_texture->id(), tensor_width, tensor_height,
|
||||||
|
mediapipe::GpuBufferFormat::kBGRA32,
|
||||||
|
[ptr = out_texture.release()](
|
||||||
|
std::shared_ptr<mediapipe::GlSyncPoint> sync_token) mutable {
|
||||||
|
delete ptr;
|
||||||
|
});
|
||||||
|
|
||||||
|
auto output =
|
||||||
|
std::make_unique<mediapipe::GpuBuffer>(std::move(texture_buffer));
|
||||||
|
kOutputImage(cc).Send(Image(*output));
|
||||||
|
;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (!input_tensors[0].ready_as_opengl_texture_2d()) {
|
||||||
|
(void)input_tensors[0].GetCpuReadView();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto output_texture =
|
||||||
|
gl_helper_.CreateDestinationTexture(tensor_width, tensor_height);
|
||||||
|
gl_helper_.BindFramebuffer(output_texture); // GL_TEXTURE0
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D,
|
||||||
|
input_tensors[0].GetOpenGlTexture2dReadView().name());
|
||||||
|
|
||||||
|
MP_RETURN_IF_ERROR(gl_renderer_->GlRender(
|
||||||
|
tensor_width, tensor_height, output_texture.width(),
|
||||||
|
output_texture.height(), mediapipe::FrameScaleMode::kStretch,
|
||||||
|
mediapipe::FrameRotation::kNone,
|
||||||
|
/*flip_horizontal=*/false, /*flip_vertical=*/false,
|
||||||
|
/*flip_texture=*/false));
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
|
||||||
|
auto output = output_texture.GetFrame<GpuBuffer>();
|
||||||
|
kOutputImage(cc).Send(Image(*output));
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
|
return mediapipe::OkStatus();
|
||||||
|
});
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||||
|
gl_helper_.RunInGlContext([this] {
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
gl_compute_program_.reset();
|
||||||
|
#else
|
||||||
|
if (program_) glDeleteProgram(program_);
|
||||||
|
program_ = 0;
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
});
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) {
|
||||||
|
if (!metal_initialized_) {
|
||||||
|
MP_RETURN_IF_ERROR(MetalSetup(cc));
|
||||||
|
metal_initialized_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kInputTensors(cc).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
const auto& input_tensors = kInputTensors(cc).Get();
|
||||||
|
RET_CHECK_EQ(input_tensors.size(), 1)
|
||||||
|
<< "Expect 1 input tensor, but have " << input_tensors.size();
|
||||||
|
const int tensor_width = input_tensors[0].shape().dims[2];
|
||||||
|
const int tensor_height = input_tensors[0].shape().dims[1];
|
||||||
|
|
||||||
|
// TODO: Fix unused variable
|
||||||
|
[[maybe_unused]] id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||||
|
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
|
||||||
|
command_buffer.label = @"TensorsToImageCalculatorConvert";
|
||||||
|
id<MTLComputeCommandEncoder> compute_encoder =
|
||||||
|
[command_buffer computeCommandEncoder];
|
||||||
|
[compute_encoder setComputePipelineState:to_buffer_program_];
|
||||||
|
|
||||||
|
auto input_view =
|
||||||
|
mediapipe::MtlBufferView::GetReadView(input_tensors[0], command_buffer);
|
||||||
|
[compute_encoder setBuffer:input_view.buffer() offset:0 atIndex:0];
|
||||||
|
|
||||||
|
mediapipe::GpuBuffer output =
|
||||||
|
[gpu_helper_ mediapipeGpuBufferWithWidth:tensor_width
|
||||||
|
height:tensor_height];
|
||||||
|
id<MTLTexture> dst_texture = [gpu_helper_ metalTextureWithGpuBuffer:output];
|
||||||
|
[compute_encoder setTexture:dst_texture atIndex:1];
|
||||||
|
|
||||||
|
MTLSize threads_per_group = MTLSizeMake(8, 8, 1);
|
||||||
|
MTLSize threadgroups =
|
||||||
|
MTLSizeMake(NumGroups(tensor_width, 8), NumGroups(tensor_height, 8), 1);
|
||||||
|
[compute_encoder dispatchThreadgroups:threadgroups
|
||||||
|
threadsPerThreadgroup:threads_per_group];
|
||||||
|
[compute_encoder endEncoding];
|
||||||
|
[command_buffer commit];
|
||||||
|
|
||||||
|
kOutputImage(cc).Send(Image(output));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) {
|
||||||
|
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||||
|
const std::string shader_source =
|
||||||
|
R"(
|
||||||
|
#include <metal_stdlib>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
kernel void convertKernel(
|
||||||
|
device float* in_buf [[ buffer(0) ]],
|
||||||
|
texture2d<float, access::read_write> out_tex [[ texture(1) ]],
|
||||||
|
uint2 gid [[ thread_position_in_grid ]]) {
|
||||||
|
if (gid.x >= out_tex.get_width() || gid.y >= out_tex.get_height()) return;
|
||||||
|
uint linear_index = 3 * (gid.y * out_tex.get_width() + gid.x);
|
||||||
|
float4 out_value = float4(in_buf[linear_index], in_buf[linear_index + 1], in_buf[linear_index + 2], 1.0);
|
||||||
|
out_tex.write(out_value, gid);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
NSString* library_source =
|
||||||
|
[NSString stringWithUTF8String:shader_source.c_str()];
|
||||||
|
NSError* error = nil;
|
||||||
|
id<MTLLibrary> library =
|
||||||
|
[device newLibraryWithSource:library_source options:nullptr error:&error];
|
||||||
|
RET_CHECK(library != nil) << "Couldn't create shader library "
|
||||||
|
<< [[error localizedDescription] UTF8String];
|
||||||
|
id<MTLFunction> kernel_func = nil;
|
||||||
|
kernel_func = [library newFunctionWithName:@"convertKernel"];
|
||||||
|
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
|
||||||
|
to_buffer_program_ =
|
||||||
|
[device newComputePipelineStateWithFunction:kernel_func error:&error];
|
||||||
|
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
|
||||||
|
[[error localizedDescription] UTF8String];
|
||||||
|
|
||||||
|
return mediapipe::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_METAL_ENABLED
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||||
|
absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) {
|
||||||
|
std::string maybe_flip_y_define;
|
||||||
|
#if !defined(__APPLE__)
|
||||||
|
const auto& options = cc->Options<TensorsToImageCalculatorOptions>();
|
||||||
|
if (options.gpu_origin() != mediapipe::GpuOrigin::TOP_LEFT) {
|
||||||
|
maybe_flip_y_define = R"(
|
||||||
|
#define FLIP_Y_COORD
|
||||||
|
)";
|
||||||
|
}
|
||||||
|
#endif // !defined(__APPLE__)
|
||||||
|
|
||||||
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
|
const std::string shader_header =
|
||||||
|
absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size_), R"(
|
||||||
|
precision highp float;
|
||||||
|
layout(rgba8, binding = 0) writeonly uniform highp image2D output_texture;
|
||||||
|
uniform ivec2 out_size;
|
||||||
|
)");
|
||||||
|
|
||||||
|
const std::string shader_body = R"(
|
||||||
|
layout(std430, binding = 2) readonly buffer B0 {
|
||||||
|
float elements[];
|
||||||
|
} input_data; // data tensor
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
int out_width = out_size.x;
|
||||||
|
int out_height = out_size.y;
|
||||||
|
|
||||||
|
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
|
||||||
|
if (gid.x >= out_width || gid.y >= out_height) { return; }
|
||||||
|
int linear_index = 3 * (gid.y * out_width + gid.x);
|
||||||
|
|
||||||
|
#ifdef FLIP_Y_COORD
|
||||||
|
int y_coord = out_height - gid.y - 1;
|
||||||
|
#else
|
||||||
|
int y_coord = gid.y;
|
||||||
|
#endif // defined(FLIP_Y_COORD)
|
||||||
|
|
||||||
|
ivec2 out_coordinate = ivec2(gid.x, y_coord);
|
||||||
|
vec4 out_value = vec4(input_data.elements[linear_index], input_data.elements[linear_index + 1], input_data.elements[linear_index + 2], 1.0);
|
||||||
|
imageStore(output_texture, out_coordinate, out_value);
|
||||||
|
})";
|
||||||
|
|
||||||
|
const std::string shader_full =
|
||||||
|
absl::StrCat(shader_header, maybe_flip_y_define, shader_body);
|
||||||
|
|
||||||
|
GlShader shader;
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
GlShader::CompileShader(GL_COMPUTE_SHADER, shader_full, &shader));
|
||||||
|
gl_compute_program_ = std::make_unique<GlProgram>();
|
||||||
|
MP_RETURN_IF_ERROR(
|
||||||
|
GlProgram::CreateWithShader(shader, gl_compute_program_.get()));
|
||||||
|
|
||||||
|
#else
|
||||||
|
constexpr GLchar kFragColorOutputDeclaration[] = R"(
|
||||||
|
#ifdef GL_ES
|
||||||
|
#define fragColor gl_FragColor
|
||||||
|
#else
|
||||||
|
out vec4 fragColor;
|
||||||
|
#endif // defined(GL_ES);
|
||||||
|
)";
|
||||||
|
|
||||||
|
constexpr GLchar kBody[] = R"(
|
||||||
|
DEFAULT_PRECISION(mediump, float)
|
||||||
|
in vec2 sample_coordinate;
|
||||||
|
uniform sampler2D tensor;
|
||||||
|
void main() {
|
||||||
|
#ifdef FLIP_Y_COORD
|
||||||
|
float y_coord = 1.0 - sample_coordinate.y;
|
||||||
|
#else
|
||||||
|
float y_coord = sample_coordinate.y;
|
||||||
|
#endif // defined(FLIP_Y_COORD)
|
||||||
|
vec3 color = texture2D(tensor, vec2(sample_coordinate.x, y_coord)).rgb;
|
||||||
|
fragColor = vec4(color, 1.0);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
const std::string src =
|
||||||
|
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
|
||||||
|
kFragColorOutputDeclaration, maybe_flip_y_define, kBody);
|
||||||
|
gl_renderer_ = std::make_unique<mediapipe::QuadRenderer>();
|
||||||
|
MP_RETURN_IF_ERROR(gl_renderer_->GlSetup(src.c_str(), {"tensor"}));
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
|
return mediapipe::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
|
||||||
|
|
||||||
|
} // namespace tasks
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,31 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe.tasks;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/gpu/gpu_origin.proto";
|
||||||
|
|
||||||
|
message TensorsToImageCalculatorOptions {
|
||||||
|
extend mediapipe.CalculatorOptions {
|
||||||
|
optional TensorsToImageCalculatorOptions ext = 511831156;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||||
|
// to be flipped vertically as tensors are expected to start at top.
|
||||||
|
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||||
|
optional mediapipe.GpuOrigin.Mode gpu_origin = 1;
|
||||||
|
}
|
|
@ -203,106 +203,111 @@ INSTANTIATE_TEST_CASE_P(
|
||||||
CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest,
|
CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest,
|
||||||
testing::ValuesIn<CombinedPredictionCalculatorTestCase>({
|
testing::ValuesIn<CombinedPredictionCalculatorTestCase>({
|
||||||
{
|
{
|
||||||
.test_name = "TestCustomDramaWinnnerWith_HighCanned_Thresh",
|
/* test_name= */ "TestCustomDramaWinnnerWith_HighCanned_Thresh",
|
||||||
.custom_negative_score = 0.1,
|
/* custom_negative_score= */ 0.1,
|
||||||
.drama_score = 0.5,
|
/* drama_score= */ 0.5,
|
||||||
.llama_score = 0.3,
|
/* llama_score= */ 0.3,
|
||||||
.drama_thresh = 0.25,
|
/* drama_thresh= */ 0.25,
|
||||||
.llama_thresh = 0.7,
|
/* llama_thresh= */ 0.7,
|
||||||
.canned_negative_score = 0.1,
|
/* canned_negative_score= */ 0.1,
|
||||||
.bazinga_score = 0.3,
|
/* bazinga_score= */ 0.3,
|
||||||
.joy_score = 0.3,
|
/* joy_score= */ 0.3,
|
||||||
.peace_score = 0.3,
|
/* peace_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.7,
|
/* bazinga_thresh= */ 0.7,
|
||||||
.joy_thresh = 0.7,
|
/* joy_thresh= */ 0.7,
|
||||||
.peace_thresh = 0.7,
|
/* peace_thresh= */ 0.7,
|
||||||
.max_scoring_label = "CustomDrama",
|
/* max_scoring_label= */ "CustomDrama",
|
||||||
.max_score = 0.5,
|
/* max_score= */ 0.5,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.test_name = "TestCannedWinnerWith_HighCustom_ZeroCanned_Thresh",
|
/* test_name= */ "TestCannedWinnerWith_HighCustom_ZeroCanned_"
|
||||||
.custom_negative_score = 0.1,
|
"Thresh",
|
||||||
.drama_score = 0.3,
|
/* custom_negative_score= */ 0.1,
|
||||||
.llama_score = 0.6,
|
/* drama_score= */ 0.3,
|
||||||
.drama_thresh = 0.4,
|
/* llama_score= */ 0.6,
|
||||||
.llama_thresh = 0.8,
|
/* drama_thresh= */ 0.4,
|
||||||
.canned_negative_score = 0.1,
|
/* llama_thresh= */ 0.8,
|
||||||
.bazinga_score = 0.4,
|
/* canned_negative_score= */ 0.1,
|
||||||
.joy_score = 0.3,
|
/* bazinga_score= */ 0.4,
|
||||||
.peace_score = 0.2,
|
/* joy_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.0,
|
/* peace_score= */ 0.2,
|
||||||
.joy_thresh = 0.0,
|
/* bazinga_thresh= */ 0.0,
|
||||||
.peace_thresh = 0.0,
|
/* joy_thresh= */ 0.0,
|
||||||
.max_scoring_label = "CannedBazinga",
|
/* peace_thresh= */ 0.0,
|
||||||
.max_score = 0.4,
|
/* max_scoring_label= */ "CannedBazinga",
|
||||||
|
/* max_score= */ 0.4,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.test_name = "TestNegativeWinnerWith_LowCustom_HighCanned_Thresh",
|
/* test_name= */ "TestNegativeWinnerWith_LowCustom_HighCanned_"
|
||||||
.custom_negative_score = 0.5,
|
"Thresh",
|
||||||
.drama_score = 0.1,
|
/* custom_negative_score= */ 0.5,
|
||||||
.llama_score = 0.4,
|
/* drama_score= */ 0.1,
|
||||||
.drama_thresh = 0.1,
|
/* llama_score= */ 0.4,
|
||||||
.llama_thresh = 0.05,
|
/* drama_thresh= */ 0.1,
|
||||||
.canned_negative_score = 0.1,
|
/* llama_thresh= */ 0.05,
|
||||||
.bazinga_score = 0.3,
|
/* canned_negative_score= */ 0.1,
|
||||||
.joy_score = 0.3,
|
/* bazinga_score= */ 0.3,
|
||||||
.peace_score = 0.3,
|
/* joy_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.7,
|
/* peace_score= */ 0.3,
|
||||||
.joy_thresh = 0.7,
|
/* bazinga_thresh= */ 0.7,
|
||||||
.peace_thresh = 0.7,
|
/* joy_thresh= */ 0.7,
|
||||||
.max_scoring_label = "Negative",
|
/* peace_thresh= */ 0.7,
|
||||||
.max_score = 0.5,
|
/* max_scoring_label= */ "Negative",
|
||||||
|
/* max_score= */ 0.5,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh",
|
/* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
|
||||||
.custom_negative_score = 0.8,
|
"Thresh",
|
||||||
.drama_score = 0.1,
|
/* custom_negative_score= */ 0.8,
|
||||||
.llama_score = 0.1,
|
/* drama_score= */ 0.1,
|
||||||
.drama_thresh = 0.25,
|
/* llama_score= */ 0.1,
|
||||||
.llama_thresh = 0.7,
|
/* drama_thresh= */ 0.25,
|
||||||
.canned_negative_score = 0.1,
|
/* llama_thresh= */ 0.7,
|
||||||
.bazinga_score = 0.3,
|
/* canned_negative_score= */ 0.1,
|
||||||
.joy_score = 0.3,
|
/* bazinga_score= */ 0.3,
|
||||||
.peace_score = 0.3,
|
/* joy_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.7,
|
/* peace_score= */ 0.3,
|
||||||
.joy_thresh = 0.7,
|
/* bazinga_thresh= */ 0.7,
|
||||||
.peace_thresh = 0.7,
|
/* joy_thresh= */ 0.7,
|
||||||
.max_scoring_label = "Negative",
|
/* peace_thresh= */ 0.7,
|
||||||
.max_score = 0.8,
|
/* max_scoring_label= */ "Negative",
|
||||||
|
/* max_score= */ 0.8,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCannedThresh2",
|
/* test_name= */ "TestNegativeWinnerWith_HighCustom_"
|
||||||
.custom_negative_score = 0.1,
|
"HighCannedThresh2",
|
||||||
.drama_score = 0.2,
|
/* custom_negative_score= */ 0.1,
|
||||||
.llama_score = 0.7,
|
/* drama_score= */ 0.2,
|
||||||
.drama_thresh = 1.1,
|
/* llama_score= */ 0.7,
|
||||||
.llama_thresh = 1.1,
|
/* drama_thresh= */ 1.1,
|
||||||
.canned_negative_score = 0.1,
|
/* llama_thresh= */ 1.1,
|
||||||
.bazinga_score = 0.3,
|
/* canned_negative_score= */ 0.1,
|
||||||
.joy_score = 0.3,
|
/* bazinga_score= */ 0.3,
|
||||||
.peace_score = 0.3,
|
/* joy_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.7,
|
/* peace_score= */ 0.3,
|
||||||
.joy_thresh = 0.7,
|
/* bazinga_thresh= */ 0.7,
|
||||||
.peace_thresh = 0.7,
|
/* joy_thresh= */ 0.7,
|
||||||
.max_scoring_label = "Negative",
|
/* peace_thresh= */ 0.7,
|
||||||
.max_score = 0.1,
|
/* max_scoring_label= */ "Negative",
|
||||||
|
/* max_score= */ 0.1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh3",
|
/* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
|
||||||
.custom_negative_score = 0.1,
|
"Thresh3",
|
||||||
.drama_score = 0.3,
|
/* custom_negative_score= */ 0.1,
|
||||||
.llama_score = 0.6,
|
/* drama_score= */ 0.3,
|
||||||
.drama_thresh = 0.4,
|
/* llama_score= */ 0.6,
|
||||||
.llama_thresh = 0.8,
|
/* drama_thresh= */ 0.4,
|
||||||
.canned_negative_score = 0.3,
|
/* llama_thresh= */ 0.8,
|
||||||
.bazinga_score = 0.2,
|
/* canned_negative_score= */ 0.3,
|
||||||
.joy_score = 0.3,
|
/* bazinga_score= */ 0.2,
|
||||||
.peace_score = 0.2,
|
/* joy_score= */ 0.3,
|
||||||
.bazinga_thresh = 0.5,
|
/* peace_score= */ 0.2,
|
||||||
.joy_thresh = 0.5,
|
/* bazinga_thresh= */ 0.5,
|
||||||
.peace_thresh = 0.5,
|
/* joy_thresh= */ 0.5,
|
||||||
.max_scoring_label = "Negative",
|
/* peace_thresh= */ 0.5,
|
||||||
.max_score = 0.1,
|
/* max_scoring_label= */ "Negative",
|
||||||
|
/* max_score= */ 0.1,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
[](const testing::TestParamInfo<
|
[](const testing::TestParamInfo<
|
||||||
|
|
|
@ -117,24 +117,24 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest,
|
LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest,
|
||||||
testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>(
|
testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>(
|
||||||
{{.test_name = "TestWithOffset0",
|
{{/* test_name= */ "TestWithOffset0",
|
||||||
.base_offset = 0,
|
/* base_offset= */ 0,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.1f,
|
/* expected_cell_0_2= */ 0.1f,
|
||||||
.expected_cell_1_5 = 0.1875f,
|
/* expected_cell_1_5= */ 0.1875f,
|
||||||
.rotation = 0},
|
/* rotation= */ 0},
|
||||||
{.test_name = "TestWithOffset21",
|
{/* test_name= */ "TestWithOffset21",
|
||||||
.base_offset = 21,
|
/* base_offset= */ 21,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.1f,
|
/* expected_cell_0_2= */ 0.1f,
|
||||||
.expected_cell_1_5 = 0.1875f,
|
/* expected_cell_1_5= */ 0.1875f,
|
||||||
.rotation = 0},
|
/* rotation= */ 0},
|
||||||
{.test_name = "TestWithRotation",
|
{/* test_name= */ "TestWithRotation",
|
||||||
.base_offset = 0,
|
/* base_offset= */ 0,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.075f,
|
/* expected_cell_0_2= */ 0.075f,
|
||||||
.expected_cell_1_5 = -0.25f,
|
/* expected_cell_1_5= */ -0.25f,
|
||||||
.rotation = M_PI / 2.0}}),
|
/* rotation= */ M_PI / 2.0}}),
|
||||||
[](const testing::TestParamInfo<
|
[](const testing::TestParamInfo<
|
||||||
Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
|
Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
|
||||||
return info.param.test_name;
|
return info.param.test_name;
|
||||||
|
@ -203,30 +203,30 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest,
|
LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest,
|
||||||
testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>(
|
testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>(
|
||||||
{{.test_name = "TestWithOffset0",
|
{{/* test_name= */ "TestWithOffset0",
|
||||||
.base_offset = 0,
|
/* base_offset= */ 0,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.1f,
|
/* expected_cell_0_2= */ 0.1f,
|
||||||
.expected_cell_1_5 = 0.25,
|
/* expected_cell_1_5= */ 0.25,
|
||||||
.rotation = 0},
|
/* rotation= */ 0},
|
||||||
{.test_name = "TestWithOffset21",
|
{/* test_name= */ "TestWithOffset21",
|
||||||
.base_offset = 21,
|
/* base_offset= */ 21,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.1f,
|
/* expected_cell_0_2= */ 0.1f,
|
||||||
.expected_cell_1_5 = 0.25,
|
/* expected_cell_1_5= */ 0.25,
|
||||||
.rotation = 0},
|
/* rotation= */ 0},
|
||||||
{.test_name = "NoObjectNormalization",
|
{/* test_name= */ "NoObjectNormalization",
|
||||||
.base_offset = 0,
|
/* base_offset= */ 0,
|
||||||
.object_normalization_origin_offset = -1,
|
/* object_normalization_origin_offset= */ -1,
|
||||||
.expected_cell_0_2 = 0.021f,
|
/* expected_cell_0_2= */ 0.021f,
|
||||||
.expected_cell_1_5 = 0.052f,
|
/* expected_cell_1_5= */ 0.052f,
|
||||||
.rotation = 0},
|
/* rotation= */ 0},
|
||||||
{.test_name = "TestWithRotation",
|
{/* test_name= */ "TestWithRotation",
|
||||||
.base_offset = 0,
|
/* base_offset= */ 0,
|
||||||
.object_normalization_origin_offset = 0,
|
/* object_normalization_origin_offset= */ 0,
|
||||||
.expected_cell_0_2 = 0.1f,
|
/* expected_cell_0_2= */ 0.1f,
|
||||||
.expected_cell_1_5 = -0.25f,
|
/* expected_cell_1_5= */ -0.25f,
|
||||||
.rotation = M_PI / 2.0}}),
|
/* rotation= */ M_PI / 2.0}}),
|
||||||
[](const testing::TestParamInfo<
|
[](const testing::TestParamInfo<
|
||||||
LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
|
LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
|
||||||
return info.param.test_name;
|
return info.param.test_name;
|
||||||
|
|
|
@ -257,19 +257,28 @@ class HandDetectorGraph : public core::ModelTaskGraph {
|
||||||
preprocessed_tensors >> inference.In("TENSORS");
|
preprocessed_tensors >> inference.In("TENSORS");
|
||||||
auto model_output_tensors = inference.Out("TENSORS");
|
auto model_output_tensors = inference.Out("TENSORS");
|
||||||
|
|
||||||
|
// TODO: support hand detection metadata.
|
||||||
|
bool has_metadata = false;
|
||||||
|
|
||||||
// Generates a single side packet containing a vector of SSD anchors.
|
// Generates a single side packet containing a vector of SSD anchors.
|
||||||
auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator");
|
auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator");
|
||||||
ConfigureSsdAnchorsCalculator(
|
auto& ssd_anchor_options =
|
||||||
&ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>());
|
ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>();
|
||||||
|
if (!has_metadata) {
|
||||||
|
ConfigureSsdAnchorsCalculator(&ssd_anchor_options);
|
||||||
|
}
|
||||||
auto anchors = ssd_anchor.SideOut("");
|
auto anchors = ssd_anchor.SideOut("");
|
||||||
|
|
||||||
// Converts output tensors to Detections.
|
// Converts output tensors to Detections.
|
||||||
auto& tensors_to_detections =
|
auto& tensors_to_detections =
|
||||||
graph.AddNode("TensorsToDetectionsCalculator");
|
graph.AddNode("TensorsToDetectionsCalculator");
|
||||||
|
if (!has_metadata) {
|
||||||
ConfigureTensorsToDetectionsCalculator(
|
ConfigureTensorsToDetectionsCalculator(
|
||||||
subgraph_options,
|
subgraph_options,
|
||||||
&tensors_to_detections
|
&tensors_to_detections
|
||||||
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
|
.GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
|
||||||
|
}
|
||||||
|
|
||||||
model_output_tensors >> tensors_to_detections.In("TENSORS");
|
model_output_tensors >> tensors_to_detections.In("TENSORS");
|
||||||
anchors >> tensors_to_detections.SideIn("ANCHORS");
|
anchors >> tensors_to_detections.SideIn("ANCHORS");
|
||||||
auto detections = tensors_to_detections.Out("DETECTIONS");
|
auto detections = tensors_to_detections.Out("DETECTIONS");
|
||||||
|
|
|
@ -148,6 +148,7 @@ cc_library(
|
||||||
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
|
"//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
|
||||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
|
||||||
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
"//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
|
||||||
|
"//mediapipe/util:graph_builder_utils",
|
||||||
],
|
],
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,6 +14,7 @@ limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -41,6 +42,7 @@ limitations under the License.
|
||||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
|
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
|
||||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
|
||||||
|
#include "mediapipe/util/graph_builder_utils.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
namespace tasks {
|
namespace tasks {
|
||||||
|
@ -53,7 +55,7 @@ using ::mediapipe::NormalizedRect;
|
||||||
using ::mediapipe::api2::Input;
|
using ::mediapipe::api2::Input;
|
||||||
using ::mediapipe::api2::Output;
|
using ::mediapipe::api2::Output;
|
||||||
using ::mediapipe::api2::builder::Graph;
|
using ::mediapipe::api2::builder::Graph;
|
||||||
using ::mediapipe::api2::builder::Source;
|
using ::mediapipe::api2::builder::Stream;
|
||||||
using ::mediapipe::tasks::components::utils::DisallowIf;
|
using ::mediapipe::tasks::components::utils::DisallowIf;
|
||||||
using ::mediapipe::tasks::core::ModelAssetBundleResources;
|
using ::mediapipe::tasks::core::ModelAssetBundleResources;
|
||||||
using ::mediapipe::tasks::metadata::SetExternalFile;
|
using ::mediapipe::tasks::metadata::SetExternalFile;
|
||||||
|
@ -78,40 +80,46 @@ constexpr char kHandLandmarksDetectorTFLiteName[] =
|
||||||
"hand_landmarks_detector.tflite";
|
"hand_landmarks_detector.tflite";
|
||||||
|
|
||||||
struct HandLandmarkerOutputs {
|
struct HandLandmarkerOutputs {
|
||||||
Source<std::vector<NormalizedLandmarkList>> landmark_lists;
|
Stream<std::vector<NormalizedLandmarkList>> landmark_lists;
|
||||||
Source<std::vector<LandmarkList>> world_landmark_lists;
|
Stream<std::vector<LandmarkList>> world_landmark_lists;
|
||||||
Source<std::vector<NormalizedRect>> hand_rects_next_frame;
|
Stream<std::vector<NormalizedRect>> hand_rects_next_frame;
|
||||||
Source<std::vector<ClassificationList>> handednesses;
|
Stream<std::vector<ClassificationList>> handednesses;
|
||||||
Source<std::vector<NormalizedRect>> palm_rects;
|
Stream<std::vector<NormalizedRect>> palm_rects;
|
||||||
Source<std::vector<Detection>> palm_detections;
|
Stream<std::vector<Detection>> palm_detections;
|
||||||
Source<Image> image;
|
Stream<Image> image;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sets the base options in the sub tasks.
|
// Sets the base options in the sub tasks.
|
||||||
absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
|
absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
|
||||||
HandLandmarkerGraphOptions* options,
|
HandLandmarkerGraphOptions* options,
|
||||||
bool is_copy) {
|
bool is_copy) {
|
||||||
ASSIGN_OR_RETURN(const auto hand_detector_file,
|
|
||||||
resources.GetModelFile(kHandDetectorTFLiteName));
|
|
||||||
auto* hand_detector_graph_options =
|
auto* hand_detector_graph_options =
|
||||||
options->mutable_hand_detector_graph_options();
|
options->mutable_hand_detector_graph_options();
|
||||||
|
if (!hand_detector_graph_options->base_options().has_model_asset()) {
|
||||||
|
ASSIGN_OR_RETURN(const auto hand_detector_file,
|
||||||
|
resources.GetModelFile(kHandDetectorTFLiteName));
|
||||||
SetExternalFile(hand_detector_file,
|
SetExternalFile(hand_detector_file,
|
||||||
hand_detector_graph_options->mutable_base_options()
|
hand_detector_graph_options->mutable_base_options()
|
||||||
->mutable_model_asset(),
|
->mutable_model_asset(),
|
||||||
is_copy);
|
is_copy);
|
||||||
|
}
|
||||||
hand_detector_graph_options->mutable_base_options()
|
hand_detector_graph_options->mutable_base_options()
|
||||||
->mutable_acceleration()
|
->mutable_acceleration()
|
||||||
->CopyFrom(options->base_options().acceleration());
|
->CopyFrom(options->base_options().acceleration());
|
||||||
hand_detector_graph_options->mutable_base_options()->set_use_stream_mode(
|
hand_detector_graph_options->mutable_base_options()->set_use_stream_mode(
|
||||||
options->base_options().use_stream_mode());
|
options->base_options().use_stream_mode());
|
||||||
ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
|
|
||||||
resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
|
|
||||||
auto* hand_landmarks_detector_graph_options =
|
auto* hand_landmarks_detector_graph_options =
|
||||||
options->mutable_hand_landmarks_detector_graph_options();
|
options->mutable_hand_landmarks_detector_graph_options();
|
||||||
SetExternalFile(hand_landmarks_detector_file,
|
if (!hand_landmarks_detector_graph_options->base_options()
|
||||||
|
.has_model_asset()) {
|
||||||
|
ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
|
||||||
|
resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
|
||||||
|
SetExternalFile(
|
||||||
|
hand_landmarks_detector_file,
|
||||||
hand_landmarks_detector_graph_options->mutable_base_options()
|
hand_landmarks_detector_graph_options->mutable_base_options()
|
||||||
->mutable_model_asset(),
|
->mutable_model_asset(),
|
||||||
is_copy);
|
is_copy);
|
||||||
|
}
|
||||||
hand_landmarks_detector_graph_options->mutable_base_options()
|
hand_landmarks_detector_graph_options->mutable_base_options()
|
||||||
->mutable_acceleration()
|
->mutable_acceleration()
|
||||||
->CopyFrom(options->base_options().acceleration());
|
->CopyFrom(options->base_options().acceleration());
|
||||||
|
@ -119,7 +127,6 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
|
||||||
->set_use_stream_mode(options->base_options().use_stream_mode());
|
->set_use_stream_mode(options->base_options().use_stream_mode());
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
|
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
|
||||||
|
@ -219,12 +226,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||||
!sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
|
!sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
|
||||||
.IsAvailable()));
|
.IsAvailable()));
|
||||||
}
|
}
|
||||||
|
Stream<Image> image_in = graph.In(kImageTag).Cast<Image>();
|
||||||
|
std::optional<Stream<NormalizedRect>> norm_rect_in;
|
||||||
|
if (HasInput(sc->OriginalNode(), kNormRectTag)) {
|
||||||
|
norm_rect_in = graph.In(kNormRectTag).Cast<NormalizedRect>();
|
||||||
|
}
|
||||||
ASSIGN_OR_RETURN(
|
ASSIGN_OR_RETURN(
|
||||||
auto hand_landmarker_outputs,
|
auto hand_landmarker_outputs,
|
||||||
BuildHandLandmarkerGraph(
|
BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
|
||||||
sc->Options<HandLandmarkerGraphOptions>(),
|
image_in, norm_rect_in, graph));
|
||||||
graph[Input<Image>(kImageTag)],
|
|
||||||
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
|
|
||||||
hand_landmarker_outputs.landmark_lists >>
|
hand_landmarker_outputs.landmark_lists >>
|
||||||
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
|
||||||
hand_landmarker_outputs.world_landmark_lists >>
|
hand_landmarker_outputs.world_landmark_lists >>
|
||||||
|
@ -262,8 +272,8 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||||
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
// image_in: (mediapipe::Image) stream to run hand landmark detection on.
|
||||||
// graph: the mediapipe graph instance to be updated.
|
// graph: the mediapipe graph instance to be updated.
|
||||||
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
|
absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
|
||||||
const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
|
const HandLandmarkerGraphOptions& tasks_options, Stream<Image> image_in,
|
||||||
Source<NormalizedRect> norm_rect_in, Graph& graph) {
|
std::optional<Stream<NormalizedRect>> norm_rect_in, Graph& graph) {
|
||||||
const int max_num_hands =
|
const int max_num_hands =
|
||||||
tasks_options.hand_detector_graph_options().num_hands();
|
tasks_options.hand_detector_graph_options().num_hands();
|
||||||
|
|
||||||
|
@ -293,10 +303,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||||
// track the hands from the last frame.
|
// track the hands from the last frame.
|
||||||
auto image_for_hand_detector =
|
auto image_for_hand_detector =
|
||||||
DisallowIf(image_in, has_enough_hands, graph);
|
DisallowIf(image_in, has_enough_hands, graph);
|
||||||
auto norm_rect_in_for_hand_detector =
|
std::optional<Stream<NormalizedRect>> norm_rect_in_for_hand_detector;
|
||||||
DisallowIf(norm_rect_in, has_enough_hands, graph);
|
if (norm_rect_in) {
|
||||||
|
norm_rect_in_for_hand_detector =
|
||||||
|
DisallowIf(norm_rect_in.value(), has_enough_hands, graph);
|
||||||
|
}
|
||||||
image_for_hand_detector >> hand_detector.In("IMAGE");
|
image_for_hand_detector >> hand_detector.In("IMAGE");
|
||||||
norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT");
|
if (norm_rect_in_for_hand_detector) {
|
||||||
|
norm_rect_in_for_hand_detector.value() >> hand_detector.In("NORM_RECT");
|
||||||
|
}
|
||||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||||
auto& hand_association = graph.AddNode("HandAssociationCalculator");
|
auto& hand_association = graph.AddNode("HandAssociationCalculator");
|
||||||
hand_association.GetOptions<HandAssociationCalculatorOptions>()
|
hand_association.GetOptions<HandAssociationCalculatorOptions>()
|
||||||
|
@ -313,7 +328,9 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
|
||||||
// series, and we don't want to enable the tracking and hand associations
|
// series, and we don't want to enable the tracking and hand associations
|
||||||
// between input images. Always use the hand detector graph.
|
// between input images. Always use the hand detector graph.
|
||||||
image_in >> hand_detector.In("IMAGE");
|
image_in >> hand_detector.In("IMAGE");
|
||||||
norm_rect_in >> hand_detector.In("NORM_RECT");
|
if (norm_rect_in) {
|
||||||
|
norm_rect_in.value() >> hand_detector.In("NORM_RECT");
|
||||||
|
}
|
||||||
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
|
||||||
hand_rects_from_hand_detector >> clip_hand_rects.In("");
|
hand_rects_from_hand_detector >> clip_hand_rects.In("");
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,16 +34,14 @@ objc_library(
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/tasks/testdata/vision:test_images",
|
"//mediapipe/tasks/testdata/vision:test_images",
|
||||||
],
|
],
|
||||||
sdk_frameworks = [
|
|
||||||
"CoreMedia",
|
|
||||||
"CoreVideo",
|
|
||||||
"CoreGraphics",
|
|
||||||
"UIKit",
|
|
||||||
"Accelerate",
|
|
||||||
],
|
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/tasks/ios/common:MPPCommon",
|
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||||
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
||||||
|
"//third_party/apple_frameworks:Accelerate",
|
||||||
|
"//third_party/apple_frameworks:CoreGraphics",
|
||||||
|
"//third_party/apple_frameworks:CoreMedia",
|
||||||
|
"//third_party/apple_frameworks:CoreVideo",
|
||||||
|
"//third_party/apple_frameworks:UIKit",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -11,11 +11,6 @@ objc_library(
|
||||||
"-std=c++17",
|
"-std=c++17",
|
||||||
],
|
],
|
||||||
module_name = "MPPImage",
|
module_name = "MPPImage",
|
||||||
sdk_frameworks = [
|
|
||||||
"CoreMedia",
|
|
||||||
"CoreVideo",
|
|
||||||
"UIKit",
|
|
||||||
],
|
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/tasks/ios/common:MPPCommon",
|
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||||
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#import <Foundation/Foundation.h>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/packet.h"
|
||||||
|
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class helps create various kinds of packets for Mediapipe Vision Tasks.
|
||||||
|
*/
|
||||||
|
@interface MPPVisionPacketCreator : NSObject
|
||||||
|
|
||||||
|
+ (mediapipe::Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error;
|
||||||
|
|
||||||
|
@end
|
|
@ -0,0 +1,43 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
|
||||||
|
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
using ::mediapipe::Image;
|
||||||
|
using ::mediapipe::ImageFrame;
|
||||||
|
using ::mediapipe::MakePacket;
|
||||||
|
using ::mediapipe::Packet;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
struct freeDeleter {
|
||||||
|
void operator()(void *ptr) { free(ptr); }
|
||||||
|
};
|
||||||
|
|
||||||
|
@implementation MPPVisionPacketCreator
|
||||||
|
|
||||||
|
+ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error {
|
||||||
|
std::unique_ptr<ImageFrame> imageFrame = [image imageFrameWithError:error];
|
||||||
|
|
||||||
|
if (!imageFrame) {
|
||||||
|
return Packet();
|
||||||
|
}
|
||||||
|
|
||||||
|
return MakePacket<Image>(std::move(imageFrame));
|
||||||
|
}
|
||||||
|
|
||||||
|
@end
|
|
@ -4,23 +4,22 @@ licenses(["notice"])
|
||||||
|
|
||||||
objc_library(
|
objc_library(
|
||||||
name = "MPPImageUtils",
|
name = "MPPImageUtils",
|
||||||
srcs = ["sources/MPPImage+Utils.m"],
|
srcs = ["sources/MPPImage+Utils.mm"],
|
||||||
hdrs = ["sources/MPPImage+Utils.h"],
|
hdrs = ["sources/MPPImage+Utils.h"],
|
||||||
copts = [
|
copts = [
|
||||||
"-ObjC++",
|
"-ObjC++",
|
||||||
"-std=c++17",
|
"-std=c++17",
|
||||||
],
|
],
|
||||||
module_name = "MPPImageUtils",
|
module_name = "MPPImageUtils",
|
||||||
sdk_frameworks = [
|
|
||||||
"Accelerate",
|
|
||||||
"CoreGraphics",
|
|
||||||
"CoreImage",
|
|
||||||
"CoreVideo",
|
|
||||||
"UIKit",
|
|
||||||
],
|
|
||||||
deps = [
|
deps = [
|
||||||
|
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/tasks/ios/common:MPPCommon",
|
||||||
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
|
||||||
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
"//mediapipe/tasks/ios/vision/core:MPPImage",
|
||||||
"//third_party/apple_frameworks:UIKit",
|
"//third_party/apple_frameworks:Accelerate",
|
||||||
|
"//third_party/apple_frameworks:CoreGraphics",
|
||||||
|
"//third_party/apple_frameworks:CoreImage",
|
||||||
|
"//third_party/apple_frameworks:CoreVideo",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,30 +14,27 @@
|
||||||
|
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
|
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
|
||||||
|
|
||||||
NS_ASSUME_NONNULL_BEGIN
|
NS_ASSUME_NONNULL_BEGIN
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library.
|
* Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`.
|
||||||
*/
|
*/
|
||||||
@interface MPPImage (Utils)
|
@interface MPPImage (Utils)
|
||||||
|
|
||||||
/** Bitmap size of the image. */
|
|
||||||
@property(nonatomic, readonly) CGSize bitmapSize;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the underlying uint8 pixel buffer of an `MPPImage`.
|
* Converts the `MPPImage` into a `mediapipe::ImageFrame`.
|
||||||
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel
|
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is
|
||||||
* data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated
|
* converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R,
|
||||||
* in the R, G, B channels.
|
* G, B channels.
|
||||||
*
|
*
|
||||||
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
|
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
|
||||||
* error will be saved.
|
* error will be saved.
|
||||||
*
|
*
|
||||||
* @return The underlying pixel buffer of the `MPPImage` or nil in case of errors.
|
* @return An std::unique_ptr<mediapipe::ImageFrame> or `nullptr` in case of errors.
|
||||||
*/
|
*/
|
||||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error;
|
- (std::unique_ptr<mediapipe::ImageFrame>)imageFrameWithError:(NSError **)error;
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,12 @@
|
||||||
#import <CoreImage/CoreImage.h>
|
#import <CoreImage/CoreImage.h>
|
||||||
#import <CoreVideo/CoreVideo.h>
|
#import <CoreVideo/CoreVideo.h>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
using ::mediapipe::ImageFrame;
|
||||||
|
}
|
||||||
|
|
||||||
@interface MPPPixelDataUtils : NSObject
|
@interface MPPPixelDataUtils : NSObject
|
||||||
|
|
||||||
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
|
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
|
||||||
|
@ -35,21 +41,20 @@
|
||||||
|
|
||||||
@interface MPPCVPixelBufferUtils : NSObject
|
@interface MPPCVPixelBufferUtils : NSObject
|
||||||
|
|
||||||
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error;
|
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||||
|
error:(NSError **)error;
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@interface MPPCGImageUtils : NSObject
|
@interface MPPCGImageUtils : NSObject
|
||||||
|
|
||||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
|
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@interface UIImage (RawPixelDataUtils)
|
@interface UIImage (ImageFrameUtils)
|
||||||
|
|
||||||
@property(nonatomic, readonly) CGSize bitmapSize;
|
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error;
|
||||||
|
|
||||||
- (uint8_t *)pixelDataWithError:(NSError **)error;
|
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
|
@ -120,9 +125,14 @@
|
||||||
|
|
||||||
@implementation MPPCVPixelBufferUtils
|
@implementation MPPCVPixelBufferUtils
|
||||||
|
|
||||||
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error {
|
+ (std::unique_ptr<ImageFrame>)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||||
|
error:(NSError **)error {
|
||||||
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
|
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
|
||||||
|
|
||||||
|
size_t width = CVPixelBufferGetWidth(pixelBuffer);
|
||||||
|
size_t height = CVPixelBufferGetHeight(pixelBuffer);
|
||||||
|
size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer);
|
||||||
|
|
||||||
uint8_t *rgbPixelData = [MPPPixelDataUtils
|
uint8_t *rgbPixelData = [MPPPixelDataUtils
|
||||||
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
|
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
|
||||||
withWidth:CVPixelBufferGetWidth(pixelBuffer)
|
withWidth:CVPixelBufferGetWidth(pixelBuffer)
|
||||||
|
@ -133,19 +143,24 @@
|
||||||
|
|
||||||
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
|
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
|
||||||
|
|
||||||
return rgbPixelData;
|
if (!rgbPixelData) {
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
std::unique_ptr<ImageFrame> imageFrame = absl::make_unique<ImageFrame>(
|
||||||
error:(NSError **)error {
|
::mediapipe::ImageFormat::SRGB, width, height, stride, static_cast<uint8 *>(rgbPixelData),
|
||||||
uint8_t *pixelData = NULL;
|
/*deleter=*/free);
|
||||||
|
|
||||||
|
return imageFrame;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
|
||||||
|
error:(NSError **)error {
|
||||||
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
|
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
|
||||||
|
|
||||||
switch (pixelBufferFormat) {
|
switch (pixelBufferFormat) {
|
||||||
case kCVPixelFormatType_32BGRA: {
|
case kCVPixelFormatType_32BGRA: {
|
||||||
pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error];
|
return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error];
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
[MPPCommonUtils createCustomError:error
|
[MPPCommonUtils createCustomError:error
|
||||||
|
@ -155,20 +170,20 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixelData;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@implementation MPPCGImageUtils
|
@implementation MPPCGImageUtils
|
||||||
|
|
||||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
|
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
|
||||||
size_t width = CGImageGetWidth(cgImage);
|
size_t width = CGImageGetWidth(cgImage);
|
||||||
size_t height = CGImageGetHeight(cgImage);
|
size_t height = CGImageGetHeight(cgImage);
|
||||||
|
|
||||||
NSInteger bitsPerComponent = 8;
|
NSInteger bitsPerComponent = 8;
|
||||||
NSInteger channelCount = 4;
|
NSInteger channelCount = 4;
|
||||||
UInt8 *pixel_data_to_return = NULL;
|
UInt8 *pixelDataToReturn = NULL;
|
||||||
|
|
||||||
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
|
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
|
||||||
size_t bytesPerRow = channelCount * width;
|
size_t bytesPerRow = channelCount * width;
|
||||||
|
@ -191,7 +206,7 @@
|
||||||
if (srcData) {
|
if (srcData) {
|
||||||
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
|
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
|
||||||
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
|
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
|
||||||
pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
|
pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
|
||||||
withWidth:width
|
withWidth:width
|
||||||
height:height
|
height:height
|
||||||
stride:bytesPerRow
|
stride:bytesPerRow
|
||||||
|
@ -204,38 +219,38 @@
|
||||||
|
|
||||||
CGColorSpaceRelease(colorSpace);
|
CGColorSpaceRelease(colorSpace);
|
||||||
|
|
||||||
return pixel_data_to_return;
|
std::unique_ptr<ImageFrame> imageFrame =
|
||||||
|
absl::make_unique<ImageFrame>(mediapipe::ImageFormat::SRGB, (int)width, (int)height,
|
||||||
|
(int)bytesPerRow, static_cast<uint8 *>(pixelDataToReturn),
|
||||||
|
/*deleter=*/free);
|
||||||
|
|
||||||
|
return imageFrame;
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@implementation UIImage (RawPixelDataUtils)
|
@implementation UIImage (ImageFrameUtils)
|
||||||
|
|
||||||
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
|
|
||||||
uint8_t *pixelData = NULL;
|
|
||||||
|
|
||||||
|
- (std::unique_ptr<ImageFrame>)imageFrameFromCIImageWithError:(NSError **)error {
|
||||||
if (self.CIImage.pixelBuffer) {
|
if (self.CIImage.pixelBuffer) {
|
||||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer
|
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error];
|
||||||
error:error];
|
|
||||||
|
|
||||||
} else if (self.CIImage.CGImage) {
|
} else if (self.CIImage.CGImage) {
|
||||||
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error];
|
return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error];
|
||||||
} else {
|
} else {
|
||||||
[MPPCommonUtils createCustomError:error
|
[MPPCommonUtils createCustomError:error
|
||||||
withCode:MPPTasksErrorCodeInvalidArgumentError
|
withCode:MPPTasksErrorCodeInvalidArgumentError
|
||||||
description:@"CIImage should have CGImage or CVPixelBuffer info."];
|
description:@"CIImage should have CGImage or CVPixelBuffer info."];
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixelData;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
- (uint8_t *)pixelDataWithError:(NSError **)error {
|
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
|
||||||
uint8_t *pixelData = nil;
|
|
||||||
|
|
||||||
if (self.CGImage) {
|
if (self.CGImage) {
|
||||||
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error];
|
return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error];
|
||||||
} else if (self.CIImage) {
|
} else if (self.CIImage) {
|
||||||
pixelData = [self pixelDataFromCIImageWithError:error];
|
return [self imageFrameFromCIImageWithError:error];
|
||||||
} else {
|
} else {
|
||||||
[MPPCommonUtils createCustomError:error
|
[MPPCommonUtils createCustomError:error
|
||||||
withCode:MPPTasksErrorCodeInvalidArgumentError
|
withCode:MPPTasksErrorCodeInvalidArgumentError
|
||||||
|
@ -243,46 +258,24 @@
|
||||||
" CIImage or CGImage."];
|
" CIImage or CGImage."];
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixelData;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
- (CGSize)bitmapSize {
|
|
||||||
CGFloat width = 0;
|
|
||||||
CGFloat height = 0;
|
|
||||||
|
|
||||||
if (self.CGImage) {
|
|
||||||
width = CGImageGetWidth(self.CGImage);
|
|
||||||
height = CGImageGetHeight(self.CGImage);
|
|
||||||
} else if (self.CIImage.pixelBuffer) {
|
|
||||||
width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
|
|
||||||
height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
|
|
||||||
} else if (self.CIImage.CGImage) {
|
|
||||||
width = CGImageGetWidth(self.CIImage.CGImage);
|
|
||||||
height = CGImageGetHeight(self.CIImage.CGImage);
|
|
||||||
}
|
|
||||||
return CGSizeMake(width, height);
|
|
||||||
}
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@implementation MPPImage (Utils)
|
@implementation MPPImage (Utils)
|
||||||
|
|
||||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error {
|
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
|
||||||
uint8_t *pixelData = NULL;
|
|
||||||
|
|
||||||
switch (self.imageSourceType) {
|
switch (self.imageSourceType) {
|
||||||
case MPPImageSourceTypeSampleBuffer: {
|
case MPPImageSourceTypeSampleBuffer: {
|
||||||
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
|
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
|
||||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer
|
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error];
|
||||||
error:error];
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case MPPImageSourceTypePixelBuffer: {
|
case MPPImageSourceTypePixelBuffer: {
|
||||||
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error];
|
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error];
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case MPPImageSourceTypeImage: {
|
case MPPImageSourceTypeImage: {
|
||||||
pixelData = [self.image pixelDataWithError:error];
|
return [self.image imageFrameWithError:error];
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
[MPPCommonUtils createCustomError:error
|
[MPPCommonUtils createCustomError:error
|
||||||
|
@ -290,35 +283,7 @@
|
||||||
description:@"Invalid source type for MPPImage."];
|
description:@"Invalid source type for MPPImage."];
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixelData;
|
return nullptr;
|
||||||
}
|
|
||||||
|
|
||||||
- (CGSize)bitmapSize {
|
|
||||||
CGFloat width = 0;
|
|
||||||
CGFloat height = 0;
|
|
||||||
|
|
||||||
switch (self.imageSourceType) {
|
|
||||||
case MPPImageSourceTypeSampleBuffer: {
|
|
||||||
CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
|
|
||||||
width = CVPixelBufferGetWidth(pixelBuffer);
|
|
||||||
height = CVPixelBufferGetHeight(pixelBuffer);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MPPImageSourceTypePixelBuffer: {
|
|
||||||
width = CVPixelBufferGetWidth(self.pixelBuffer);
|
|
||||||
height = CVPixelBufferGetHeight(self.pixelBuffer);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MPPImageSourceTypeImage: {
|
|
||||||
width = self.image.bitmapSize.width;
|
|
||||||
height = self.image.bitmapSize.height;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return CGSizeMake(width, height);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
38
mediapipe/tasks/ios/vision/image_classifier/BUILD
Normal file
38
mediapipe/tasks/ios/vision/image_classifier/BUILD
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
objc_library(
|
||||||
|
name = "MPPImageClassifierResult",
|
||||||
|
srcs = ["sources/MPPImageClassifierResult.m"],
|
||||||
|
hdrs = ["sources/MPPImageClassifierResult.h"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/tasks/ios/components/containers:MPPClassificationResult",
|
||||||
|
"//mediapipe/tasks/ios/core:MPPTaskResult",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
objc_library(
|
||||||
|
name = "MPPImageClassifierOptions",
|
||||||
|
srcs = ["sources/MPPImageClassifierOptions.m"],
|
||||||
|
hdrs = ["sources/MPPImageClassifierOptions.h"],
|
||||||
|
deps = [
|
||||||
|
":MPPImageClassifierResult",
|
||||||
|
"//mediapipe/tasks/ios/core:MPPTaskOptions",
|
||||||
|
"//mediapipe/tasks/ios/vision/core:MPPRunningMode",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,71 @@
|
||||||
|
// Copyright 2023 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#import <Foundation/Foundation.h>
|
||||||
|
|
||||||
|
#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h"
|
||||||
|
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
|
||||||
|
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
|
||||||
|
|
||||||
|
NS_ASSUME_NONNULL_BEGIN
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for setting up a `MPPImageClassifier`.
|
||||||
|
*/
|
||||||
|
NS_SWIFT_NAME(ImageClassifierOptions)
|
||||||
|
@interface MPPImageClassifierOptions : MPPTaskOptions <NSCopying>
|
||||||
|
|
||||||
|
@property(nonatomic) MPPRunningMode runningMode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The user-defined result callback for processing live stream data. The result callback should only
|
||||||
|
* be specified when the running mode is set to the live stream mode.
|
||||||
|
*/
|
||||||
|
@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults
|
||||||
|
* to English.
|
||||||
|
*/
|
||||||
|
@property(nonatomic, copy) NSString *displayNamesLocale;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of top-scored classification results to return. If < 0, all available results
|
||||||
|
* will be returned. If 0, an invalid argument error is returned.
|
||||||
|
*/
|
||||||
|
@property(nonatomic) NSInteger maxResults;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Score threshold to override the one provided in the model metadata (if any). Results below this
|
||||||
|
* value are rejected.
|
||||||
|
*/
|
||||||
|
@property(nonatomic) float scoreThreshold;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allowlist of category names. If non-empty, detection results whose category name is not in
|
||||||
|
* this set will be filtered out. Duplicate or unknown category names are ignored. Mutually
|
||||||
|
* exclusive with categoryDenylist.
|
||||||
|
*/
|
||||||
|
@property(nonatomic, copy) NSArray<NSString *> *categoryAllowlist;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The denylist of category names. If non-empty, detection results whose category name is in this
|
||||||
|
* set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive
|
||||||
|
* with categoryAllowlist.
|
||||||
|
*/
|
||||||
|
@property(nonatomic, copy) NSArray<NSString *> *categoryDenylist;
|
||||||
|
|
||||||
|
@end
|
||||||
|
|
||||||
|
NS_ASSUME_NONNULL_END
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user