Merge branch 'google:master' into master
This commit is contained in:
		
						commit
						6e7018b826
					
				
							
								
								
									
										11
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								README.md
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -19,6 +19,17 @@ ML solutions for live and streaming media.
 | 
			
		|||
                                                             | 
 | 
			
		||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework*            | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## ML solutions in MediaPipe
 | 
			
		||||
 | 
			
		||||
Face Detection                                                                                                                 | Face Mesh                                                                                                       | Iris                                                                                                      | Hands                                                                                                      | Pose                                                                                                      | Holistic
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										13
									
								
								docs/_layouts/forward.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								docs/_layouts/forward.html
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,13 @@
 | 
			
		|||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
    <meta charset="utf-8"/>
 | 
			
		||||
    <meta http-equiv="refresh" content="0;url={{ page.target }}"/>
 | 
			
		||||
    <link rel="canonical" href="{{ page.target }}"/>
 | 
			
		||||
    <title>Redirecting</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body>
 | 
			
		||||
    <p>This page now lives on https://developers.google.com/mediapipe/. If you aren't automatically
 | 
			
		||||
      redirected, follow this
 | 
			
		||||
    <a href="{{ page.target }}">link</a>.</p>
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
| 
						 | 
				
			
			@ -593,3 +593,105 @@ CalculatorGraphConfig BuildGraph() {
 | 
			
		|||
  return graph.GetConfig();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Separate nodes for better readability
 | 
			
		||||
 | 
			
		||||
```c++ {.bad}
 | 
			
		||||
CalculatorGraphConfig BuildGraph() {
 | 
			
		||||
  Graph graph;
 | 
			
		||||
 | 
			
		||||
  // Inputs.
 | 
			
		||||
  Stream<A> a = graph.In(0).Cast<A>();
 | 
			
		||||
  auto& node1 = graph.AddNode("Calculator1");
 | 
			
		||||
  a.ConnectTo(node1.In("INPUT"));
 | 
			
		||||
  Stream<B> b = node1.Out("OUTPUT").Cast<B>();
 | 
			
		||||
  auto& node2 = graph.AddNode("Calculator2");
 | 
			
		||||
  b.ConnectTo(node2.In("INPUT"));
 | 
			
		||||
  Stream<C> c = node2.Out("OUTPUT").Cast<C>();
 | 
			
		||||
  auto& node3 = graph.AddNode("Calculator3");
 | 
			
		||||
  b.ConnectTo(node3.In("INPUT_B"));
 | 
			
		||||
  c.ConnectTo(node3.In("INPUT_C"));
 | 
			
		||||
  Stream<D> d = node3.Out("OUTPUT").Cast<D>();
 | 
			
		||||
  auto& node4 = graph.AddNode("Calculator4");
 | 
			
		||||
  b.ConnectTo(node4.In("INPUT_B"));
 | 
			
		||||
  c.ConnectTo(node4.In("INPUT_C"));
 | 
			
		||||
  d.ConnectTo(node4.In("INPUT_D"));
 | 
			
		||||
  Stream<E> e = node4.Out("OUTPUT").Cast<E>();
 | 
			
		||||
  // Outputs.
 | 
			
		||||
  b.SetName("b").ConnectTo(graph.Out(0));
 | 
			
		||||
  c.SetName("c").ConnectTo(graph.Out(1));
 | 
			
		||||
  d.SetName("d").ConnectTo(graph.Out(2));
 | 
			
		||||
  e.SetName("e").ConnectTo(graph.Out(3));
 | 
			
		||||
 | 
			
		||||
  return graph.GetConfig();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
In the above code, it can be hard to grasp the idea where each node begins and
 | 
			
		||||
ends. To improve this and help your code readers, you can simply have blank
 | 
			
		||||
lines before and after each node:
 | 
			
		||||
 | 
			
		||||
```c++ {.good}
 | 
			
		||||
CalculatorGraphConfig BuildGraph() {
 | 
			
		||||
  Graph graph;
 | 
			
		||||
 | 
			
		||||
  // Inputs.
 | 
			
		||||
  Stream<A> a = graph.In(0).Cast<A>();
 | 
			
		||||
 | 
			
		||||
  auto& node1 = graph.AddNode("Calculator1");
 | 
			
		||||
  a.ConnectTo(node1.In("INPUT"));
 | 
			
		||||
  Stream<B> b = node1.Out("OUTPUT").Cast<B>();
 | 
			
		||||
 | 
			
		||||
  auto& node2 = graph.AddNode("Calculator2");
 | 
			
		||||
  b.ConnectTo(node2.In("INPUT"));
 | 
			
		||||
  Stream<C> c = node2.Out("OUTPUT").Cast<C>();
 | 
			
		||||
 | 
			
		||||
  auto& node3 = graph.AddNode("Calculator3");
 | 
			
		||||
  b.ConnectTo(node3.In("INPUT_B"));
 | 
			
		||||
  c.ConnectTo(node3.In("INPUT_C"));
 | 
			
		||||
  Stream<D> d = node3.Out("OUTPUT").Cast<D>();
 | 
			
		||||
 | 
			
		||||
  auto& node4 = graph.AddNode("Calculator4");
 | 
			
		||||
  b.ConnectTo(node4.In("INPUT_B"));
 | 
			
		||||
  c.ConnectTo(node4.In("INPUT_C"));
 | 
			
		||||
  d.ConnectTo(node4.In("INPUT_D"));
 | 
			
		||||
  Stream<E> e = node4.Out("OUTPUT").Cast<E>();
 | 
			
		||||
 | 
			
		||||
  // Outputs.
 | 
			
		||||
  b.SetName("b").ConnectTo(graph.Out(0));
 | 
			
		||||
  c.SetName("c").ConnectTo(graph.Out(1));
 | 
			
		||||
  d.SetName("d").ConnectTo(graph.Out(2));
 | 
			
		||||
  e.SetName("e").ConnectTo(graph.Out(3));
 | 
			
		||||
 | 
			
		||||
  return graph.GetConfig();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Also, the above representation matches `CalculatorGraphConfig` proto
 | 
			
		||||
representation better.
 | 
			
		||||
 | 
			
		||||
If you extract nodes into utility functions, they are scoped within functions
 | 
			
		||||
already and it's clear where they begin and end, so it's completely fine to
 | 
			
		||||
have:
 | 
			
		||||
 | 
			
		||||
```c++ {.good}
 | 
			
		||||
CalculatorGraphConfig BuildGraph() {
 | 
			
		||||
  Graph graph;
 | 
			
		||||
 | 
			
		||||
  // Inputs.
 | 
			
		||||
  Stream<A> a = graph.In(0).Cast<A>();
 | 
			
		||||
 | 
			
		||||
  Stream<B> b = RunCalculator1(a, graph);
 | 
			
		||||
  Stream<C> c = RunCalculator2(b, graph);
 | 
			
		||||
  Stream<D> d = RunCalculator3(b, c, graph);
 | 
			
		||||
  Stream<E> e = RunCalculator4(b, c, d, graph);
 | 
			
		||||
 | 
			
		||||
  // Outputs.
 | 
			
		||||
  b.SetName("b").ConnectTo(graph.Out(0));
 | 
			
		||||
  c.SetName("c").ConnectTo(graph.Out(1));
 | 
			
		||||
  d.SetName("d").ConnectTo(graph.Out(2));
 | 
			
		||||
  e.SetName("e").ConnectTo(graph.Out(3));
 | 
			
		||||
 | 
			
		||||
  return graph.GetConfig();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/calculators
 | 
			
		||||
title: Calculators
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/overview
 | 
			
		||||
title: Framework Concepts
 | 
			
		||||
nav_order: 5
 | 
			
		||||
has_children: true
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/gpu
 | 
			
		||||
title: GPU
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 5
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/graphs
 | 
			
		||||
title: Graphs
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/packets
 | 
			
		||||
title: Packets
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/realtime_streams
 | 
			
		||||
title: Real-time Streams
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 6
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/framework_concepts/synchronization
 | 
			
		||||
title: Synchronization
 | 
			
		||||
parent: Framework Concepts
 | 
			
		||||
nav_order: 4
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,17 @@ nav_order: 2
 | 
			
		|||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
 | 
			
		||||
the new site, but will remain available in the source code repository on an
 | 
			
		||||
as-is basis.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
MediaPipe Android Solution APIs (currently in alpha) are available in:
 | 
			
		||||
 | 
			
		||||
*   [MediaPipe Face Detection](../solutions/face_detection#android-solution-api)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,17 @@ nav_exclude: true
 | 
			
		|||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
 | 
			
		||||
the new site, but will remain available in the source code repository on an
 | 
			
		||||
as-is basis.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
### Android
 | 
			
		||||
 | 
			
		||||
Please see these [instructions](./android.md).
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/getting_started/faq
 | 
			
		||||
title: FAQ
 | 
			
		||||
parent: Getting Started
 | 
			
		||||
nav_order: 9
 | 
			
		||||
| 
						 | 
				
			
			@ -59,7 +60,7 @@ The second approach allows up to [`max_in_flight`] invocations of the
 | 
			
		|||
packets from [`CalculatorBase::Process`] are automatically ordered by timestamp
 | 
			
		||||
before they are passed along to downstream calculators.
 | 
			
		||||
 | 
			
		||||
With either aproach, you must be aware that the calculator running in parallel
 | 
			
		||||
With either approach, you must be aware that the calculator running in parallel
 | 
			
		||||
cannot maintain internal state in the same way as a normal sequential
 | 
			
		||||
calculator.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,3 +11,14 @@ has_children: true
 | 
			
		|||
1. TOC
 | 
			
		||||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
 | 
			
		||||
the new site, but will remain available in the source code repository on an
 | 
			
		||||
as-is basis.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/getting_started/gpu_support
 | 
			
		||||
title: GPU Support
 | 
			
		||||
parent: Getting Started
 | 
			
		||||
nav_order: 7
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/getting_started/help
 | 
			
		||||
title: Getting Help
 | 
			
		||||
parent: Getting Started
 | 
			
		||||
nav_order: 8
 | 
			
		||||
| 
						 | 
				
			
			@ -37,8 +38,8 @@ If you open a GitHub issue, here is our policy:
 | 
			
		|||
- **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**:
 | 
			
		||||
- **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**:
 | 
			
		||||
- **Bazel version**:
 | 
			
		||||
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev enviroment)**:
 | 
			
		||||
- **Xcode & Tulsi version (if issue is related to building in mobile dev enviroment)**:
 | 
			
		||||
- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev environment)**:
 | 
			
		||||
- **Xcode & Tulsi version (if issue is related to building in mobile dev environment)**:
 | 
			
		||||
- **Exact steps to reproduce**:
 | 
			
		||||
 | 
			
		||||
### Describe the problem
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/getting_started/install
 | 
			
		||||
title: Installation
 | 
			
		||||
parent: Getting Started
 | 
			
		||||
nav_order: 6
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,17 @@ nav_order: 4
 | 
			
		|||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023. This content will not be moved to
 | 
			
		||||
the new site, but will remain available in the source code repository on an
 | 
			
		||||
as-is basis.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Ready-to-use JavaScript Solutions
 | 
			
		||||
 | 
			
		||||
MediaPipe currently offers the following solutions:
 | 
			
		||||
| 
						 | 
				
			
			@ -33,7 +44,7 @@ snippets.
 | 
			
		|||
 | 
			
		||||
| Browser | Platform                | Notes                                  |
 | 
			
		||||
| ------- | ----------------------- | -------------------------------------- |
 | 
			
		||||
| Chrome  | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia |
 | 
			
		||||
| Chrome  | Android / Windows / Mac | Pixel 4 and older unsupported. Fuchsia |
 | 
			
		||||
|         |                         | unsupported.                           |
 | 
			
		||||
| Chrome  | iOS                     | Camera unavailable in Chrome on iOS.   |
 | 
			
		||||
| Safari  | iPad/iPhone/Mac         | iOS and Safari on iPad / iPhone /      |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/getting_started/troubleshooting
 | 
			
		||||
title: Troubleshooting
 | 
			
		||||
parent: Getting Started
 | 
			
		||||
nav_order: 10
 | 
			
		||||
| 
						 | 
				
			
			@ -65,7 +66,7 @@ WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/gith
 | 
			
		|||
```
 | 
			
		||||
 | 
			
		||||
usually indicates that Bazel fails to download necessary dependency repositories
 | 
			
		||||
that MediaPipe needs. MedaiPipe has several dependency repositories that are
 | 
			
		||||
that MediaPipe needs. MediaPipe has several dependency repositories that are
 | 
			
		||||
hosted by Google sites. In some regions, you may need to set up a network proxy
 | 
			
		||||
or use a VPN to access those resources. You may also need to append
 | 
			
		||||
`--host_jvm_args "-DsocksProxyHost=<ip address> -DsocksProxyPort=<port number>"`
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,17 @@ ML solutions for live and streaming media.
 | 
			
		|||
                                                             | 
 | 
			
		||||
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework*            | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thanks for your interest in MediaPipe! We are moving to
 | 
			
		||||
[https://developers.google.com/mediapipe](https://developers.google.com/mediapipe)
 | 
			
		||||
as the primary developer documentation
 | 
			
		||||
site for MediaPipe starting April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## ML solutions in MediaPipe
 | 
			
		||||
 | 
			
		||||
Face Detection                                                                                                                 | Face Mesh                                                                                                       | Iris                                                                                                      | Hands                                                                                                      | Pose                                                                                                      | Holistic
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 14
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
AutoFlip is an automatic video cropping pipeline built on top of MediaPipe. This
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 10
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe Box Tracking has been powering real-time tracking in
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 1
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe Face Detection is an ultrafast face detection solution that comes with
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 2
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
 | 
			
		||||
| 
						 | 
				
			
			@ -133,7 +143,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
 | 
			
		|||
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
 | 
			
		||||
detection in the screen coordinate space: the X- and Y- coordinates are
 | 
			
		||||
normalized screen coordinates, while the Z coordinate is relative and is scaled
 | 
			
		||||
as the X coodinate under the
 | 
			
		||||
as the X coordinate under the
 | 
			
		||||
[weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection).
 | 
			
		||||
This format is well-suited for some applications, however it does not directly
 | 
			
		||||
enable the full spectrum of augmented reality (AR) features like aligning a
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 8
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
## Example Apps
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 4
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
The ability to perceive the shape and motion of hands can be a vital component
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 6
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
Live perception of simultaneous [human pose](./pose.md),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 11
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
Augmented Reality (AR) technology creates fun, engaging, and immersive user
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 3
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
A wide range of real-world applications, including computational photography
 | 
			
		||||
| 
						 | 
				
			
			@ -38,7 +48,7 @@ camera, in real-time, without the need for specialized hardware. Through use of
 | 
			
		|||
iris landmarks, the solution is also able to determine the metric distance
 | 
			
		||||
between the subject and the camera with relative error less than 10%. Note that
 | 
			
		||||
iris tracking does not infer the location at which people are looking, nor does
 | 
			
		||||
it provide any form of identity recognition. With the cross-platfrom capability
 | 
			
		||||
it provide any form of identity recognition. With the cross-platform capability
 | 
			
		||||
of the MediaPipe framework, MediaPipe Iris can run on most modern
 | 
			
		||||
[mobile phones](#mobile), [desktops/laptops](#desktop) and even on the
 | 
			
		||||
[web](#web).
 | 
			
		||||
| 
						 | 
				
			
			@ -99,7 +109,7 @@ You can also find more details in this
 | 
			
		|||
### Iris Landmark Model
 | 
			
		||||
 | 
			
		||||
The iris model takes an image patch of the eye region and estimates both the eye
 | 
			
		||||
landmarks (along the eyelid) and iris landmarks (along ths iris contour). You
 | 
			
		||||
landmarks (along the eyelid) and iris landmarks (along this iris contour). You
 | 
			
		||||
can find more details in this [paper](https://arxiv.org/abs/2006.11341).
 | 
			
		||||
 | 
			
		||||
 |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 13
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe KNIFT is a template-based feature matching solution using KNIFT
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 15
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe is a useful and general framework for media processing that can
 | 
			
		||||
| 
						 | 
				
			
			@ -85,7 +95,7 @@ process new data sets, in the documentation of
 | 
			
		|||
 | 
			
		||||
    MediaSequence uses SequenceExamples as the format of both inputs and
 | 
			
		||||
    outputs. Annotations are encoded as inputs in a SequenceExample of metadata
 | 
			
		||||
    that defines the labels and the path to the cooresponding video file. This
 | 
			
		||||
    that defines the labels and the path to the corresponding video file. This
 | 
			
		||||
    metadata is passed as input to the C++ `media_sequence_demo` binary, and the
 | 
			
		||||
    output is a SequenceExample filled with images and annotations ready for
 | 
			
		||||
    model training.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,6 +12,20 @@ nav_order: 30
 | 
			
		|||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for
 | 
			
		||||
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
as of March 1, 2023. All other
 | 
			
		||||
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
to a new MediaPipe Solution. The code repository and prebuilt binaries for all
 | 
			
		||||
MediaPipe Legacy Solutions will continue to be provided on an as-is basis.
 | 
			
		||||
We encourage you to check out the new MediaPipe Solutions at:
 | 
			
		||||
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
 | 
			
		||||
 | 
			
		||||
*   Short-range model (best for faces within 2 meters from the camera):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 9
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
## Example Apps
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 12
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
MediaPipe Objectron is a mobile real-time 3D object detection solution for
 | 
			
		||||
| 
						 | 
				
			
			@ -170,7 +180,7 @@ and a
 | 
			
		|||
The detection subgraph performs ML inference only once every few frames to
 | 
			
		||||
reduce computation load, and decodes the output tensor to a FrameAnnotation that
 | 
			
		||||
contains nine keypoints: the 3D bounding box's center and its eight vertices.
 | 
			
		||||
The tracking subgraph runs every frame, using the box traker in
 | 
			
		||||
The tracking subgraph runs every frame, using the box tracker in
 | 
			
		||||
[MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly
 | 
			
		||||
enclosing the projection of the 3D bounding box, and lifts the tracked 2D
 | 
			
		||||
keypoints to 3D with
 | 
			
		||||
| 
						 | 
				
			
			@ -613,7 +623,7 @@ z_ndc = 1 / Z
 | 
			
		|||
 | 
			
		||||
### Pixel Space
 | 
			
		||||
 | 
			
		||||
In this API we set upper-left coner of an image as the origin of pixel
 | 
			
		||||
In this API we set upper-left corner of an image as the origin of pixel
 | 
			
		||||
coordinate. One can convert from NDC to pixel space as follows:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,16 @@ nav_order: 5
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
Human pose estimation from video plays a critical role in various applications
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,16 @@ nav_order: 1
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
One of the applications
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 7
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
As of March 1, 2023, this solution is planned to be upgraded to a new MediaPipe
 | 
			
		||||
Solution. For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
*Fig 1. Example of MediaPipe Selfie Segmentation.* |
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,7 +13,21 @@ has_toc: false
 | 
			
		|||
{:toc}
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Note: These solutions are no longer actively maintained. Consider using or migrating to the new [MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide).
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions. We have
 | 
			
		||||
ended support for
 | 
			
		||||
[these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
as of March 1, 2023. All other
 | 
			
		||||
[MediaPipe Legacy Solutions will be upgraded](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
to a new MediaPipe Solution. The
 | 
			
		||||
[code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
 | 
			
		||||
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to
 | 
			
		||||
be provided on an as-is basis. We encourage you to check out the new MediaPipe
 | 
			
		||||
Solutions at:
 | 
			
		||||
[https://developers.google.com/mediapipe/solutions](https://developers.google.com/mediapipe/solutions)*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on June 1, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
MediaPipe offers open source cross-platform, customizable ML solutions for live
 | 
			
		||||
and streaming media.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,16 @@ nav_order: 16
 | 
			
		|||
</details>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Attention:** *Thank you for your interest in MediaPipe Solutions.
 | 
			
		||||
We have ended support for this MediaPipe Legacy Solution as of March 1, 2023.
 | 
			
		||||
For more information, see the new
 | 
			
		||||
[MediaPipe Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
 | 
			
		||||
site.*
 | 
			
		||||
 | 
			
		||||
*This notice and web page will be removed on April 3, 2023.*
 | 
			
		||||
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
MediaPipe is a useful and general framework for media processing that can assist
 | 
			
		||||
with research, development, and deployment of ML models. This example focuses on
 | 
			
		||||
model development by demonstrating how to prepare training data and do model
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,6 @@
 | 
			
		|||
---
 | 
			
		||||
layout: default
 | 
			
		||||
layout: forward
 | 
			
		||||
target: https://developers.google.com/mediapipe/framework/tools/visualizer
 | 
			
		||||
title: Visualizer
 | 
			
		||||
parent: Tools
 | 
			
		||||
nav_order: 1
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -48,7 +48,6 @@ class MergeToVectorCalculator : public Node {
 | 
			
		|||
  }
 | 
			
		||||
 | 
			
		||||
  absl::Status Process(CalculatorContext* cc) {
 | 
			
		||||
    const int input_num = kIn(cc).Count();
 | 
			
		||||
    std::vector<T> output_vector;
 | 
			
		||||
    for (auto it = kIn(cc).begin(); it != kIn(cc).end(); it++) {
 | 
			
		||||
      const auto& elem = *it;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,7 @@
 | 
			
		|||
# limitations under the License.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
load("@bazel_skylib//lib:selects.bzl", "selects")
 | 
			
		||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
 | 
			
		||||
load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -23,6 +24,14 @@ package(
 | 
			
		|||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
selects.config_setting_group(
 | 
			
		||||
    name = "ios_or_disable_gpu",
 | 
			
		||||
    match_any = [
 | 
			
		||||
        "//mediapipe/gpu:disable_gpu",
 | 
			
		||||
        "//mediapipe:ios",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "detection_proto",
 | 
			
		||||
    srcs = ["detection.proto"],
 | 
			
		||||
| 
						 | 
				
			
			@ -336,9 +345,7 @@ cc_library(
 | 
			
		|||
        "//conditions:default": [
 | 
			
		||||
            "//mediapipe/gpu:gl_texture_buffer",
 | 
			
		||||
        ],
 | 
			
		||||
        "//mediapipe:ios": [
 | 
			
		||||
        ],
 | 
			
		||||
        "//mediapipe/gpu:disable_gpu": [],
 | 
			
		||||
        "ios_or_disable_gpu": [],
 | 
			
		||||
    }) + select({
 | 
			
		||||
        "//conditions:default": [],
 | 
			
		||||
        "//mediapipe:apple": [
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,15 +18,16 @@
 | 
			
		|||
 | 
			
		||||
#include "absl/strings/str_cat.h"
 | 
			
		||||
#include "absl/strings/str_join.h"
 | 
			
		||||
#include "absl/strings/string_view.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
namespace tool {
 | 
			
		||||
 | 
			
		||||
absl::Status StatusInvalid(const std::string& message) {
 | 
			
		||||
absl::Status StatusInvalid(absl::string_view message) {
 | 
			
		||||
  return absl::Status(absl::StatusCode::kInvalidArgument, message);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status StatusFail(const std::string& message) {
 | 
			
		||||
absl::Status StatusFail(absl::string_view message) {
 | 
			
		||||
  return absl::Status(absl::StatusCode::kUnknown, message);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -35,12 +36,12 @@ absl::Status StatusStop() {
 | 
			
		|||
                      "mediapipe::tool::StatusStop()");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status AddStatusPrefix(const std::string& prefix,
 | 
			
		||||
absl::Status AddStatusPrefix(absl::string_view prefix,
 | 
			
		||||
                             const absl::Status& status) {
 | 
			
		||||
  return absl::Status(status.code(), absl::StrCat(prefix, status.message()));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status CombinedStatus(const std::string& general_comment,
 | 
			
		||||
absl::Status CombinedStatus(absl::string_view general_comment,
 | 
			
		||||
                            const std::vector<absl::Status>& statuses) {
 | 
			
		||||
  // The final error code is absl::StatusCode::kUnknown if not all
 | 
			
		||||
  // the error codes are the same.  Otherwise it is the same error code
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,7 @@
 | 
			
		|||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "absl/base/macros.h"
 | 
			
		||||
#include "absl/strings/string_view.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
| 
						 | 
				
			
			@ -34,16 +35,16 @@ absl::Status StatusStop();
 | 
			
		|||
// Return a status which signals an invalid initial condition (for
 | 
			
		||||
// example an InputSidePacket does not include all necessary fields).
 | 
			
		||||
ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.")
 | 
			
		||||
absl::Status StatusInvalid(const std::string& error_message);
 | 
			
		||||
absl::Status StatusInvalid(absl::string_view error_message);
 | 
			
		||||
 | 
			
		||||
// Return a status which signals that something unexpectedly failed.
 | 
			
		||||
ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.")
 | 
			
		||||
absl::Status StatusFail(const std::string& error_message);
 | 
			
		||||
absl::Status StatusFail(absl::string_view error_message);
 | 
			
		||||
 | 
			
		||||
// Prefixes the given string to the error message in status.
 | 
			
		||||
// This function should be considered internal to the framework.
 | 
			
		||||
// TODO Replace usage of AddStatusPrefix with util::Annotate().
 | 
			
		||||
absl::Status AddStatusPrefix(const std::string& prefix,
 | 
			
		||||
absl::Status AddStatusPrefix(absl::string_view prefix,
 | 
			
		||||
                             const absl::Status& status);
 | 
			
		||||
 | 
			
		||||
// Combine a vector of absl::Status into a single composite status.
 | 
			
		||||
| 
						 | 
				
			
			@ -51,7 +52,7 @@ absl::Status AddStatusPrefix(const std::string& prefix,
 | 
			
		|||
// will be returned.
 | 
			
		||||
// This function should be considered internal to the framework.
 | 
			
		||||
// TODO Move this function to somewhere with less visibility.
 | 
			
		||||
absl::Status CombinedStatus(const std::string& general_comment,
 | 
			
		||||
absl::Status CombinedStatus(absl::string_view general_comment,
 | 
			
		||||
                            const std::vector<absl::Status>& statuses);
 | 
			
		||||
 | 
			
		||||
}  // namespace tool
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,7 +15,9 @@
 | 
			
		|||
package com.google.mediapipe.components;
 | 
			
		||||
 | 
			
		||||
import static java.lang.Math.max;
 | 
			
		||||
import static java.lang.Math.min;
 | 
			
		||||
 | 
			
		||||
import android.graphics.Bitmap;
 | 
			
		||||
import android.graphics.SurfaceTexture;
 | 
			
		||||
import android.opengl.GLES11Ext;
 | 
			
		||||
import android.opengl.GLES20;
 | 
			
		||||
| 
						 | 
				
			
			@ -25,9 +27,12 @@ import android.util.Log;
 | 
			
		|||
import com.google.mediapipe.framework.TextureFrame;
 | 
			
		||||
import com.google.mediapipe.glutil.CommonShaders;
 | 
			
		||||
import com.google.mediapipe.glutil.ShaderUtil;
 | 
			
		||||
import java.nio.ByteBuffer;
 | 
			
		||||
import java.nio.ByteOrder;
 | 
			
		||||
import java.nio.FloatBuffer;
 | 
			
		||||
import java.util.HashMap;
 | 
			
		||||
import java.util.Map;
 | 
			
		||||
import java.util.concurrent.atomic.AtomicBoolean;
 | 
			
		||||
import java.util.concurrent.atomic.AtomicReference;
 | 
			
		||||
import javax.microedition.khronos.egl.EGLConfig;
 | 
			
		||||
import javax.microedition.khronos.opengles.GL10;
 | 
			
		||||
| 
						 | 
				
			
			@ -44,6 +49,13 @@ import javax.microedition.khronos.opengles.GL10;
 | 
			
		|||
 * {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}).
 | 
			
		||||
 */
 | 
			
		||||
public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
 | 
			
		||||
  /**
 | 
			
		||||
   * Listener for Bitmap capture requests.
 | 
			
		||||
   */
 | 
			
		||||
  public interface BitmapCaptureListener {
 | 
			
		||||
    void onBitmapCaptured(Bitmap result);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private static final String TAG = "DemoRenderer";
 | 
			
		||||
  private static final int ATTRIB_POSITION = 1;
 | 
			
		||||
  private static final int ATTRIB_TEXTURE_COORDINATE = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -56,12 +68,32 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
 | 
			
		|||
  private int frameUniform;
 | 
			
		||||
  private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES;
 | 
			
		||||
  private int textureTransformUniform;
 | 
			
		||||
  private boolean shouldFitToWidth = false;
 | 
			
		||||
  // Controls the alignment between frame size and surface size, 0.5f default is centered.
 | 
			
		||||
  private float alignmentHorizontal = 0.5f;
 | 
			
		||||
  private float alignmentVertical = 0.5f;
 | 
			
		||||
  private float[] textureTransformMatrix = new float[16];
 | 
			
		||||
  private SurfaceTexture surfaceTexture = null;
 | 
			
		||||
  private final AtomicReference<TextureFrame> nextFrame = new AtomicReference<>();
 | 
			
		||||
  private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean();
 | 
			
		||||
  private BitmapCaptureListener bitmapCaptureListener;
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Sets the {@link BitmapCaptureListener}.
 | 
			
		||||
   */
 | 
			
		||||
  public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) {
 | 
			
		||||
    this.bitmapCaptureListener = bitmapCaptureListener;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * Request to capture Bitmap of the next frame.
 | 
			
		||||
   *
 | 
			
		||||
   * The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note
 | 
			
		||||
   * this is an expensive operation and the result may not be available for a while.
 | 
			
		||||
   */
 | 
			
		||||
  public void captureNextFrameBitmap() {
 | 
			
		||||
    captureNextFrameBitmap.set(true);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  @Override
 | 
			
		||||
  public void onSurfaceCreated(GL10 gl, EGLConfig config) {
 | 
			
		||||
| 
						 | 
				
			
			@ -147,6 +179,31 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
 | 
			
		|||
 | 
			
		||||
    GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
 | 
			
		||||
    ShaderUtil.checkGlError("glDrawArrays");
 | 
			
		||||
 | 
			
		||||
    // Capture Bitmap if requested.
 | 
			
		||||
    BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener;
 | 
			
		||||
    if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) {
 | 
			
		||||
      int bitmapSize = surfaceWidth * surfaceHeight;
 | 
			
		||||
      ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4);
 | 
			
		||||
      byteBuffer.order(ByteOrder.nativeOrder());
 | 
			
		||||
      GLES20.glReadPixels(
 | 
			
		||||
          0, 0, surfaceWidth, surfaceHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, byteBuffer);
 | 
			
		||||
      int[] pixelBuffer = new int[bitmapSize];
 | 
			
		||||
      byteBuffer.asIntBuffer().get(pixelBuffer);
 | 
			
		||||
      for (int i = 0; i < bitmapSize; i++) {
 | 
			
		||||
        // Swap R and B channels.
 | 
			
		||||
        pixelBuffer[i] =
 | 
			
		||||
            (pixelBuffer[i] & 0xff00ff00)
 | 
			
		||||
                | ((pixelBuffer[i] & 0x000000ff) << 16)
 | 
			
		||||
                | ((pixelBuffer[i] & 0x00ff0000) >> 16);
 | 
			
		||||
      }
 | 
			
		||||
      Bitmap bitmap = Bitmap.createBitmap(surfaceWidth, surfaceHeight, Bitmap.Config.ARGB_8888);
 | 
			
		||||
      bitmap.setPixels(
 | 
			
		||||
          pixelBuffer, /* offset= */bitmapSize - surfaceWidth, /* stride= */-surfaceWidth,
 | 
			
		||||
          /* x= */0, /* y= */0, surfaceWidth, surfaceHeight);
 | 
			
		||||
      bitmapCaptureListener.onBitmapCaptured(bitmap);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLES20.glBindTexture(textureTarget, 0);
 | 
			
		||||
    ShaderUtil.checkGlError("unbind surfaceTexture");
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -158,13 +215,17 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
 | 
			
		|||
    // TODO: compute scale from surfaceTexture size.
 | 
			
		||||
    float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f;
 | 
			
		||||
    float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f;
 | 
			
		||||
    // Whichever of the two scales is greater corresponds to the dimension where the image
 | 
			
		||||
    // is proportionally smaller than the view. Dividing both scales by that number results
 | 
			
		||||
    // By default whichever of the two scales is greater corresponds to the dimension where the
 | 
			
		||||
    // image is proportionally smaller than the view. Dividing both scales by that number results
 | 
			
		||||
    // in that dimension having scale 1.0, and thus touching the edges of the view, while the
 | 
			
		||||
    // other is cropped proportionally.
 | 
			
		||||
    float maxScale = max(scaleWidth, scaleHeight);
 | 
			
		||||
    scaleWidth /= maxScale;
 | 
			
		||||
    scaleHeight /= maxScale;
 | 
			
		||||
    // other is cropped proportionally. If shouldFitToWidth is set as true, use the min scale
 | 
			
		||||
    // if frame width is greater than frame height.
 | 
			
		||||
    float scale = max(scaleWidth, scaleHeight);
 | 
			
		||||
    if (shouldFitToWidth && (frameWidth > frameHeight)) {
 | 
			
		||||
      scale = min(scaleWidth, scaleHeight);
 | 
			
		||||
    }
 | 
			
		||||
    scaleWidth /= scale;
 | 
			
		||||
    scaleHeight /= scale;
 | 
			
		||||
 | 
			
		||||
    // Alignment controls where the visible section is placed within the full camera frame, with
 | 
			
		||||
    // (0, 0) being the bottom left, and (1, 1) being the top right.
 | 
			
		||||
| 
						 | 
				
			
			@ -232,6 +293,11 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
 | 
			
		|||
    frameHeight = height;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Supports fit to width when the frame width is greater than the frame height. */
 | 
			
		||||
  public void setShouldFitToWidth(boolean shouldFitToWidth) {
 | 
			
		||||
    this.shouldFitToWidth = shouldFitToWidth;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * When the aspect ratios between the camera frame and the surface size are mismatched, this
 | 
			
		||||
   * controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -35,7 +35,6 @@ cc_library(
 | 
			
		|||
        "//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/components/processors:embedder_options",
 | 
			
		||||
        "//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/components/utils:cosine_similarity",
 | 
			
		||||
        "//mediapipe/tasks/cc/core:base_options",
 | 
			
		||||
        "//mediapipe/tasks/cc/core:task_runner",
 | 
			
		||||
        "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,6 @@ limitations under the License.
 | 
			
		|||
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/components/processors/embedder_options.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/components/utils/cosine_similarity.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/core/task_runner.h"
 | 
			
		||||
#include "tensorflow/lite/core/api/op_resolver.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -147,10 +146,4 @@ absl::Status AudioEmbedder::EmbedAsync(Matrix audio_block,
 | 
			
		|||
            .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::StatusOr<double> AudioEmbedder::CosineSimilarity(
 | 
			
		||||
    const components::containers::Embedding& u,
 | 
			
		||||
    const components::containers::Embedding& v) {
 | 
			
		||||
  return components::utils::CosineSimilarity(u, v);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::audio::audio_embedder
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -125,16 +125,6 @@ class AudioEmbedder : core::BaseAudioTaskApi {
 | 
			
		|||
 | 
			
		||||
  // Shuts down the AudioEmbedder when all works are done.
 | 
			
		||||
  absl::Status Close() { return runner_->Close(); }
 | 
			
		||||
 | 
			
		||||
  // Utility function to compute cosine similarity [1] between two embeddings.
 | 
			
		||||
  // May return an InvalidArgumentError if e.g. the embeddings are of different
 | 
			
		||||
  // types (quantized vs. float), have different sizes, or have a an L2-norm of
 | 
			
		||||
  // 0.
 | 
			
		||||
  //
 | 
			
		||||
  // [1]: https://en.wikipedia.org/wiki/Cosine_similarity
 | 
			
		||||
  static absl::StatusOr<double> CosineSimilarity(
 | 
			
		||||
      const components::containers::Embedding& u,
 | 
			
		||||
      const components::containers::Embedding& v);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::audio::audio_embedder
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -54,8 +54,6 @@ constexpr char kModelWithMetadata[] = "yamnet_embedding_metadata.tflite";
 | 
			
		|||
constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav";
 | 
			
		||||
constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav";
 | 
			
		||||
constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav";
 | 
			
		||||
constexpr float kSpeechSimilarities[] = {0.985359, 0.994349, 0.993227, 0.996658,
 | 
			
		||||
                                         0.996384};
 | 
			
		||||
constexpr int kMilliSecondsPerSecond = 1000;
 | 
			
		||||
constexpr int kYamnetNumOfAudioSamples = 15600;
 | 
			
		||||
constexpr int kYamnetAudioSampleRate = 16000;
 | 
			
		||||
| 
						 | 
				
			
			@ -163,15 +161,9 @@ TEST_F(EmbedTest, SucceedsWithSameAudioAtDifferentSampleRates) {
 | 
			
		|||
                          audio_embedder->Embed(audio_buffer1, 16000));
 | 
			
		||||
  MP_ASSERT_OK_AND_ASSIGN(auto result2,
 | 
			
		||||
                          audio_embedder->Embed(audio_buffer2, 48000));
 | 
			
		||||
  int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
 | 
			
		||||
  int expected_size = 5;
 | 
			
		||||
  ASSERT_EQ(result1.size(), expected_size);
 | 
			
		||||
  ASSERT_EQ(result2.size(), expected_size);
 | 
			
		||||
  for (int i = 0; i < expected_size; ++i) {
 | 
			
		||||
    MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
 | 
			
		||||
                                                   result1[i].embeddings[0],
 | 
			
		||||
                                                   result2[i].embeddings[0]));
 | 
			
		||||
    EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
 | 
			
		||||
  }
 | 
			
		||||
  MP_EXPECT_OK(audio_embedder->Close());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -192,10 +184,6 @@ TEST_F(EmbedTest, SucceedsWithDifferentAudios) {
 | 
			
		|||
      audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate));
 | 
			
		||||
  ASSERT_EQ(result1.size(), 5);
 | 
			
		||||
  ASSERT_EQ(result2.size(), 1);
 | 
			
		||||
  MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
 | 
			
		||||
                                                 result1[0].embeddings[0],
 | 
			
		||||
                                                 result2[0].embeddings[0]));
 | 
			
		||||
  EXPECT_NEAR(similarity, 0.09017f, 1e-6);
 | 
			
		||||
  MP_EXPECT_OK(audio_embedder->Close());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -258,15 +246,9 @@ TEST_F(EmbedAsyncTest, SucceedsWithSameAudioAtDifferentSampleRates) {
 | 
			
		|||
  RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1);
 | 
			
		||||
  std::vector<AudioEmbedderResult> result2;
 | 
			
		||||
  RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2);
 | 
			
		||||
  int expected_size = sizeof(kSpeechSimilarities) / sizeof(float);
 | 
			
		||||
  int expected_size = 5;
 | 
			
		||||
  ASSERT_EQ(result1.size(), expected_size);
 | 
			
		||||
  ASSERT_EQ(result2.size(), expected_size);
 | 
			
		||||
  for (int i = 0; i < expected_size; ++i) {
 | 
			
		||||
    MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
 | 
			
		||||
                                                   result1[i].embeddings[0],
 | 
			
		||||
                                                   result2[i].embeddings[0]));
 | 
			
		||||
    EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
 | 
			
		||||
| 
						 | 
				
			
			@ -276,10 +258,6 @@ TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) {
 | 
			
		|||
  RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2);
 | 
			
		||||
  ASSERT_EQ(result1.size(), 5);
 | 
			
		||||
  ASSERT_EQ(result2.size(), 1);
 | 
			
		||||
  MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity(
 | 
			
		||||
                                                 result1[0].embeddings[0],
 | 
			
		||||
                                                 result2[0].embeddings[0]));
 | 
			
		||||
  EXPECT_NEAR(similarity, 0.09017f, 1e-6);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -185,15 +185,15 @@ TEST_P(CalibrationWithoutIndicesTest, Succeeds) {
 | 
			
		|||
 | 
			
		||||
INSTANTIATE_TEST_SUITE_P(
 | 
			
		||||
    ScoreCalibrationCalculatorTest, CalibrationWithoutIndicesTest,
 | 
			
		||||
    Values(CalibrationTestParams{.score_transformation = "IDENTITY",
 | 
			
		||||
                                 .expected_results = {0.4948505976,
 | 
			
		||||
                                                      0.5059588508, 0.2, 0.2}},
 | 
			
		||||
    Values(CalibrationTestParams{
 | 
			
		||||
               /* score_transformation= */ "IDENTITY",
 | 
			
		||||
               /* expected_results= */ {0.4948505976, 0.5059588508, 0.2, 0.2}},
 | 
			
		||||
           CalibrationTestParams{
 | 
			
		||||
               .score_transformation = "LOG",
 | 
			
		||||
               .expected_results = {0.2976901255, 0.3393665735, 0.2, 0.2}},
 | 
			
		||||
               /* score_transformation= */ "LOG",
 | 
			
		||||
               /* expected_results= */ {0.2976901255, 0.3393665735, 0.2, 0.2}},
 | 
			
		||||
           CalibrationTestParams{
 | 
			
		||||
               .score_transformation = "INVERSE_LOGISTIC",
 | 
			
		||||
               .expected_results = {0.3203217641, 0.3778080605, 0.2, 0.2}}),
 | 
			
		||||
               /* score_transformation= */ "INVERSE_LOGISTIC",
 | 
			
		||||
               /* expected_results= */ {0.3203217641, 0.3778080605, 0.2, 0.2}}),
 | 
			
		||||
    [](const TestParamInfo<CalibrationWithoutIndicesTest::ParamType>& info) {
 | 
			
		||||
      return info.param.score_transformation;
 | 
			
		||||
    });
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -17,6 +17,7 @@ limitations under the License.
 | 
			
		|||
#define MEDIAPIPE_TASKS_CC_COMPONENTS_CONTAINERS_LANDMARK_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -332,9 +332,11 @@ cc_library(
 | 
			
		|||
        "//mediapipe/tasks:internal",
 | 
			
		||||
    ],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":external_file_handler",
 | 
			
		||||
        "//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
 | 
			
		||||
        "//mediapipe/framework:calculator_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/api2:builder",
 | 
			
		||||
        "//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/metadata:metadata_schema_cc",
 | 
			
		||||
        "@com_google_absl//absl/strings",
 | 
			
		||||
        "@flatbuffers//:runtime_cc",
 | 
			
		||||
| 
						 | 
				
			
			@ -375,6 +377,5 @@ cc_test(
 | 
			
		|||
        "//mediapipe/tasks/cc:common",
 | 
			
		||||
        "//mediapipe/tasks/cc/core/proto:external_file_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/metadata/utils:zip_utils",
 | 
			
		||||
        "@org_tensorflow//tensorflow/lite/c:common",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ limitations under the License.
 | 
			
		|||
#include <windows.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#endif
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <string>
 | 
			
		||||
| 
						 | 
				
			
			@ -102,9 +102,13 @@ absl::StatusOr<std::string> PathToResourceAsFile(std::string path) {
 | 
			
		|||
#else
 | 
			
		||||
  if (absl::StartsWith(path, "./")) {
 | 
			
		||||
    path = "mediapipe" + path.substr(1);
 | 
			
		||||
  } else if (path[0] != '/') {
 | 
			
		||||
    path = "mediapipe/" + path;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string error;
 | 
			
		||||
  // TODO: We should ideally use `CreateForTests` when this is
 | 
			
		||||
  // accessed from unit tests.
 | 
			
		||||
  std::unique_ptr<::bazel::tools::cpp::runfiles::Runfiles> runfiles(
 | 
			
		||||
      ::bazel::tools::cpp::runfiles::Runfiles::Create("", &error));
 | 
			
		||||
  if (!runfiles) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -88,6 +88,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFile) {
 | 
			
		|||
          .status());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef _WIN32
 | 
			
		||||
TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
 | 
			
		||||
  const int model_file_descriptor = open(kTestModelBundlePath, O_RDONLY);
 | 
			
		||||
  auto model_file = std::make_unique<proto::ExternalFile>();
 | 
			
		||||
| 
						 | 
				
			
			@ -103,6 +104,7 @@ TEST(ModelAssetBundleResourcesTest, CreateFromFileDescriptor) {
 | 
			
		|||
      model_bundle_resources->GetModelFile("dummy_gesture_recognizer.tflite")
 | 
			
		||||
          .status());
 | 
			
		||||
}
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
 | 
			
		||||
TEST(ModelAssetBundleResourcesTest, CreateFromFilePointer) {
 | 
			
		||||
  auto file_content = LoadBinaryContent(kTestModelBundlePath);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -136,6 +136,7 @@ TEST_F(ModelResourcesTest, CreateFromFile) {
 | 
			
		|||
  CheckModelResourcesPackets(model_resources.get());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef _WIN32
 | 
			
		||||
TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
 | 
			
		||||
  const int model_file_descriptor = open(kTestModelPath, O_RDONLY);
 | 
			
		||||
  auto model_file = std::make_unique<proto::ExternalFile>();
 | 
			
		||||
| 
						 | 
				
			
			@ -145,6 +146,7 @@ TEST_F(ModelResourcesTest, CreateFromFileDescriptor) {
 | 
			
		|||
      ModelResources::Create(kTestModelResourcesTag, std::move(model_file)));
 | 
			
		||||
  CheckModelResourcesPackets(model_resources.get());
 | 
			
		||||
}
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
 | 
			
		||||
TEST_F(ModelResourcesTest, CreateFromInvalidFile) {
 | 
			
		||||
  auto model_file = std::make_unique<proto::ExternalFile>();
 | 
			
		||||
| 
						 | 
				
			
			@ -168,6 +170,15 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
 | 
			
		|||
  auto status_or_model_resources =
 | 
			
		||||
      ModelResources::Create(kTestModelResourcesTag, std::move(model_file));
 | 
			
		||||
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
  EXPECT_EQ(status_or_model_resources.status().code(),
 | 
			
		||||
            absl::StatusCode::kFailedPrecondition);
 | 
			
		||||
  EXPECT_THAT(
 | 
			
		||||
      status_or_model_resources.status().message(),
 | 
			
		||||
      testing::HasSubstr("File descriptors are not supported on Windows."));
 | 
			
		||||
  AssertStatusHasMediaPipeTasksStatusCode(status_or_model_resources.status(),
 | 
			
		||||
                                          MediaPipeTasksStatus::kFileReadError);
 | 
			
		||||
#else
 | 
			
		||||
  EXPECT_EQ(status_or_model_resources.status().code(),
 | 
			
		||||
            absl::StatusCode::kInvalidArgument);
 | 
			
		||||
  EXPECT_THAT(
 | 
			
		||||
| 
						 | 
				
			
			@ -176,6 +187,7 @@ TEST_F(ModelResourcesTest, CreateFromInvalidFileDescriptor) {
 | 
			
		|||
  AssertStatusHasMediaPipeTasksStatusCode(
 | 
			
		||||
      status_or_model_resources.status(),
 | 
			
		||||
      MediaPipeTasksStatus::kInvalidArgumentError);
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(ModelResourcesTest, CreateFailWithCorruptedFile) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,6 +23,8 @@ limitations under the License.
 | 
			
		|||
#include "absl/strings/string_view.h"
 | 
			
		||||
#include "flatbuffers/flatbuffers.h"
 | 
			
		||||
#include "mediapipe/calculators/core/flow_limiter_calculator.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/core/external_file_handler.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
namespace tasks {
 | 
			
		||||
| 
						 | 
				
			
			@ -34,13 +36,11 @@ constexpr char kFlowLimiterCalculatorName[] = "FlowLimiterCalculator";
 | 
			
		|||
}  // namespace
 | 
			
		||||
 | 
			
		||||
std::string LoadBinaryContent(const char* filename) {
 | 
			
		||||
  std::ifstream input_file(filename, std::ios::binary | std::ios::ate);
 | 
			
		||||
  // Find buffer size from input file, and load the buffer.
 | 
			
		||||
  size_t buffer_size = input_file.tellg();
 | 
			
		||||
  std::string buffer(buffer_size, '\0');
 | 
			
		||||
  input_file.seekg(0, std::ios::beg);
 | 
			
		||||
  input_file.read(const_cast<char*>(buffer.c_str()), buffer_size);
 | 
			
		||||
  return buffer;
 | 
			
		||||
  proto::ExternalFile external_file;
 | 
			
		||||
  external_file.set_file_name(filename);
 | 
			
		||||
  auto file_handler =
 | 
			
		||||
      ExternalFileHandler::CreateFromExternalFile(&external_file);
 | 
			
		||||
  return std::string{(*file_handler)->GetFileContent()};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int FindTensorIndexByMetadataName(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,6 +16,7 @@ cc_test(
 | 
			
		|||
        "//mediapipe/framework/port:gtest_main",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/tasks/cc:common",
 | 
			
		||||
        "//mediapipe/tasks/cc/core:utils",
 | 
			
		||||
        "//mediapipe/tasks/cc/metadata:metadata_extractor",
 | 
			
		||||
        "@com_google_absl//absl/status",
 | 
			
		||||
        "@com_google_absl//absl/status:statusor",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,12 +25,14 @@ limitations under the License.
 | 
			
		|||
#include "mediapipe/framework/port/status_macros.h"
 | 
			
		||||
#include "mediapipe/framework/port/status_matchers.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/common.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/core/utils.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
namespace tasks {
 | 
			
		||||
namespace metadata {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
using core::LoadBinaryContent;
 | 
			
		||||
using ::testing::Optional;
 | 
			
		||||
 | 
			
		||||
constexpr char kTestDataDirectory[] = "mediapipe/tasks/testdata/metadata";
 | 
			
		||||
| 
						 | 
				
			
			@ -53,8 +55,8 @@ constexpr char kRandomTextFile[] = "external_file";
 | 
			
		|||
 | 
			
		||||
absl::StatusOr<std::unique_ptr<ModelMetadataExtractor>> CreateMetadataExtractor(
 | 
			
		||||
    std::string model_name, std::string* file_contents) {
 | 
			
		||||
  MP_RETURN_IF_ERROR(file::GetContents(
 | 
			
		||||
      file::JoinPath("./", kTestDataDirectory, model_name), file_contents));
 | 
			
		||||
  *file_contents = LoadBinaryContent(
 | 
			
		||||
      file::JoinPath("./", kTestDataDirectory, model_name).c_str());
 | 
			
		||||
  return ModelMetadataExtractor::CreateFromModelBuffer(file_contents->data(),
 | 
			
		||||
                                                       file_contents->length());
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,7 +26,11 @@ using ::testing::MatchesRegex;
 | 
			
		|||
 | 
			
		||||
TEST(MetadataParserTest, MatadataParserVersionIsWellFormed) {
 | 
			
		||||
  // Validates that the version is well-formed (x.y.z).
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
  EXPECT_THAT(kMatadataParserVersion, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
 | 
			
		||||
#else
 | 
			
		||||
  EXPECT_THAT(kMatadataParserVersion, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -83,7 +83,11 @@ TEST(MetadataVersionTest,
 | 
			
		|||
                                            builder.GetSize(), &min_version),
 | 
			
		||||
            kTfLiteOk);
 | 
			
		||||
  // Validates that the version is well-formed (x.y.z).
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
  EXPECT_THAT(min_version, MatchesRegex("\\d+\\.\\d+\\.\\d+"));
 | 
			
		||||
#else
 | 
			
		||||
  EXPECT_THAT(min_version, MatchesRegex("[0-9]+\\.[0-9]+\\.[0-9]+"));
 | 
			
		||||
#endif  // _WIN32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST(MetadataVersionTest,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										49
									
								
								mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								mediapipe/tasks/cc/vision/face_geometry/calculators/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,49 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//mediapipe/tasks:internal"])
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "geometry_pipeline_calculator_proto",
 | 
			
		||||
    srcs = ["geometry_pipeline_calculator.proto"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/framework:calculator_options_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "geometry_pipeline_calculator",
 | 
			
		||||
    srcs = ["geometry_pipeline_calculator.cc"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":geometry_pipeline_calculator_cc_proto",
 | 
			
		||||
        "//mediapipe/framework:calculator_framework",
 | 
			
		||||
        "//mediapipe/framework/formats:landmark_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/port:logging",
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/framework/port:statusor",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/libs:geometry_pipeline",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/libs:validation_utils",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
 | 
			
		||||
        "//mediapipe/util:resource_util",
 | 
			
		||||
        "@com_google_absl//absl/memory",
 | 
			
		||||
    ],
 | 
			
		||||
    alwayslink = 1,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,194 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "absl/memory/memory.h"
 | 
			
		||||
#include "mediapipe/framework/calculator_framework.h"
 | 
			
		||||
#include "mediapipe/framework/formats/landmark.pb.h"
 | 
			
		||||
#include "mediapipe/framework/port/ret_check.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/framework/port/status_macros.h"
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
 | 
			
		||||
#include "mediapipe/util/resource_util.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
 | 
			
		||||
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
 | 
			
		||||
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
 | 
			
		||||
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
 | 
			
		||||
 | 
			
		||||
using ::mediapipe::tasks::vision::face_geometry::proto::Environment;
 | 
			
		||||
using ::mediapipe::tasks::vision::face_geometry::proto::FaceGeometry;
 | 
			
		||||
using ::mediapipe::tasks::vision::face_geometry::proto::
 | 
			
		||||
    GeometryPipelineMetadata;
 | 
			
		||||
 | 
			
		||||
// A calculator that renders a visual effect for multiple faces.
 | 
			
		||||
//
 | 
			
		||||
// Inputs:
 | 
			
		||||
//   IMAGE_SIZE (`std::pair<int, int>`, required):
 | 
			
		||||
//     The size of the current frame. The first element of the pair is the frame
 | 
			
		||||
//     width; the other one is the frame height.
 | 
			
		||||
//
 | 
			
		||||
//     The face landmarks should have been detected on a frame with the same
 | 
			
		||||
//     ratio. If used as-is, the resulting face geometry visualization should be
 | 
			
		||||
//     happening on a frame with the same ratio as well.
 | 
			
		||||
//
 | 
			
		||||
//   MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
 | 
			
		||||
//     A vector of face landmark lists.
 | 
			
		||||
//
 | 
			
		||||
// Input side packets:
 | 
			
		||||
//   ENVIRONMENT (`proto::Environment`, required)
 | 
			
		||||
//     Describes an environment; includes the camera frame origin point location
 | 
			
		||||
//     as well as virtual camera parameters.
 | 
			
		||||
//
 | 
			
		||||
// Output:
 | 
			
		||||
//   MULTI_FACE_GEOMETRY (`std::vector<FaceGeometry>`, required):
 | 
			
		||||
//     A vector of face geometry data.
 | 
			
		||||
//
 | 
			
		||||
// Options:
 | 
			
		||||
//   metadata_path (`string`, optional):
 | 
			
		||||
//     Defines a path for the geometry pipeline metadata file.
 | 
			
		||||
//
 | 
			
		||||
//     The geometry pipeline metadata file format must be the binary
 | 
			
		||||
//     `GeometryPipelineMetadata` proto.
 | 
			
		||||
//
 | 
			
		||||
class GeometryPipelineCalculator : public CalculatorBase {
 | 
			
		||||
 public:
 | 
			
		||||
  static absl::Status GetContract(CalculatorContract* cc) {
 | 
			
		||||
    cc->InputSidePackets().Tag(kEnvironmentTag).Set<Environment>();
 | 
			
		||||
    cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
 | 
			
		||||
    cc->Inputs()
 | 
			
		||||
        .Tag(kMultiFaceLandmarksTag)
 | 
			
		||||
        .Set<std::vector<mediapipe::NormalizedLandmarkList>>();
 | 
			
		||||
    cc->Outputs().Tag(kMultiFaceGeometryTag).Set<std::vector<FaceGeometry>>();
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  absl::Status Open(CalculatorContext* cc) override {
 | 
			
		||||
    cc->SetOffset(mediapipe::TimestampDiff(0));
 | 
			
		||||
 | 
			
		||||
    const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
 | 
			
		||||
 | 
			
		||||
    ASSIGN_OR_RETURN(
 | 
			
		||||
        GeometryPipelineMetadata metadata,
 | 
			
		||||
        ReadMetadataFromFile(options.metadata_path()),
 | 
			
		||||
        _ << "Failed to read the geometry pipeline metadata from file!");
 | 
			
		||||
 | 
			
		||||
    MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
 | 
			
		||||
        << "Invalid geometry pipeline metadata!";
 | 
			
		||||
 | 
			
		||||
    const Environment& environment =
 | 
			
		||||
        cc->InputSidePackets().Tag(kEnvironmentTag).Get<Environment>();
 | 
			
		||||
 | 
			
		||||
    MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
 | 
			
		||||
        << "Invalid environment!";
 | 
			
		||||
 | 
			
		||||
    ASSIGN_OR_RETURN(geometry_pipeline_,
 | 
			
		||||
                     CreateGeometryPipeline(environment, metadata),
 | 
			
		||||
                     _ << "Failed to create a geometry pipeline!");
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  absl::Status Process(CalculatorContext* cc) override {
 | 
			
		||||
    // Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
 | 
			
		||||
    // to have a non-empty packet. In case this requirement is not met, there's
 | 
			
		||||
    // nothing to be processed at the current timestamp.
 | 
			
		||||
    if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
 | 
			
		||||
        cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
 | 
			
		||||
      return absl::OkStatus();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const auto& image_size =
 | 
			
		||||
        cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
 | 
			
		||||
    const auto& multi_face_landmarks =
 | 
			
		||||
        cc->Inputs()
 | 
			
		||||
            .Tag(kMultiFaceLandmarksTag)
 | 
			
		||||
            .Get<std::vector<mediapipe::NormalizedLandmarkList>>();
 | 
			
		||||
 | 
			
		||||
    auto multi_face_geometry = absl::make_unique<std::vector<FaceGeometry>>();
 | 
			
		||||
 | 
			
		||||
    ASSIGN_OR_RETURN(
 | 
			
		||||
        *multi_face_geometry,
 | 
			
		||||
        geometry_pipeline_->EstimateFaceGeometry(
 | 
			
		||||
            multi_face_landmarks,  //
 | 
			
		||||
            /*frame_width*/ image_size.first,
 | 
			
		||||
            /*frame_height*/ image_size.second),
 | 
			
		||||
        _ << "Failed to estimate face geometry for multiple faces!");
 | 
			
		||||
 | 
			
		||||
    cc->Outputs()
 | 
			
		||||
        .Tag(kMultiFaceGeometryTag)
 | 
			
		||||
        .AddPacket(mediapipe::Adopt<std::vector<FaceGeometry>>(
 | 
			
		||||
                       multi_face_geometry.release())
 | 
			
		||||
                       .At(cc->InputTimestamp()));
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  absl::Status Close(CalculatorContext* cc) override {
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  static absl::StatusOr<GeometryPipelineMetadata> ReadMetadataFromFile(
 | 
			
		||||
      const std::string& metadata_path) {
 | 
			
		||||
    ASSIGN_OR_RETURN(std::string metadata_blob,
 | 
			
		||||
                     ReadContentBlobFromFile(metadata_path),
 | 
			
		||||
                     _ << "Failed to read a metadata blob from file!");
 | 
			
		||||
 | 
			
		||||
    GeometryPipelineMetadata metadata;
 | 
			
		||||
    RET_CHECK(metadata.ParseFromString(metadata_blob))
 | 
			
		||||
        << "Failed to parse a metadata proto from a binary blob!";
 | 
			
		||||
 | 
			
		||||
    return metadata;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static absl::StatusOr<std::string> ReadContentBlobFromFile(
 | 
			
		||||
      const std::string& unresolved_path) {
 | 
			
		||||
    ASSIGN_OR_RETURN(std::string resolved_path,
 | 
			
		||||
                     mediapipe::PathToResourceAsFile(unresolved_path),
 | 
			
		||||
                     _ << "Failed to resolve path! Path = " << unresolved_path);
 | 
			
		||||
 | 
			
		||||
    std::string content_blob;
 | 
			
		||||
    MP_RETURN_IF_ERROR(
 | 
			
		||||
        mediapipe::GetResourceContents(resolved_path, &content_blob))
 | 
			
		||||
        << "Failed to read content blob! Resolved path = " << resolved_path;
 | 
			
		||||
 | 
			
		||||
    return content_blob;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<GeometryPipeline> geometry_pipeline_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
 | 
			
		||||
 | 
			
		||||
REGISTER_CALCULATOR(
 | 
			
		||||
    ::mediapipe::tasks::vision::face_geometry::FaceGeometryPipelineCalculator);
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,27 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks.vision.face_geometry;
 | 
			
		||||
 | 
			
		||||
import "mediapipe/framework/calculator_options.proto";
 | 
			
		||||
 | 
			
		||||
message FaceGeometryPipelineCalculatorOptions {
 | 
			
		||||
  extend mediapipe.CalculatorOptions {
 | 
			
		||||
    optional FaceGeometryPipelineCalculatorOptions ext = 512499200;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  optional string metadata_path = 1;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										59
									
								
								mediapipe/tasks/cc/vision/face_geometry/data/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								mediapipe/tasks/cc/vision/face_geometry/data/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,59 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//visibility:public"])
 | 
			
		||||
 | 
			
		||||
encode_binary_proto(
 | 
			
		||||
    name = "geometry_pipeline_metadata_detection",
 | 
			
		||||
    input = "geometry_pipeline_metadata_detection.pbtxt",
 | 
			
		||||
    message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
 | 
			
		||||
    output = "geometry_pipeline_metadata_detection.binarypb",
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
encode_binary_proto(
 | 
			
		||||
    name = "geometry_pipeline_metadata_landmarks",
 | 
			
		||||
    input = "geometry_pipeline_metadata_landmarks.pbtxt",
 | 
			
		||||
    message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
 | 
			
		||||
    output = "geometry_pipeline_metadata_landmarks.binarypb",
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
 | 
			
		||||
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
 | 
			
		||||
encode_binary_proto(
 | 
			
		||||
    name = "geometry_pipeline_metadata",
 | 
			
		||||
    input = "geometry_pipeline_metadata_landmarks.pbtxt",
 | 
			
		||||
    message_type = "mediapipe.tasks.vision.face_geometry.proto.GeometryPipelineMetadata",
 | 
			
		||||
    output = "geometry_pipeline_metadata.binarypb",
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# These canonical face model files are not meant to be used in runtime, but rather for asset
 | 
			
		||||
# creation and/or reference.
 | 
			
		||||
exports_files([
 | 
			
		||||
    "canonical_face_model.fbx",
 | 
			
		||||
    "canonical_face_model.obj",
 | 
			
		||||
    "canonical_face_model_uv_visualization.png",
 | 
			
		||||
])
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 731 KiB  | 
| 
						 | 
				
			
			@ -0,0 +1,78 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
input_source: FACE_DETECTION_PIPELINE
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
 | 
			
		||||
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
 | 
			
		||||
# NOTE: the triangular topology of the face meshes is only useful when derived
 | 
			
		||||
#       from the 468 face landmarks, not from the 6 face detection landmarks
 | 
			
		||||
#       (keypoints). The former don't cover the entire face and this mesh is
 | 
			
		||||
#       defined here only to comply with the API. It should be considered as
 | 
			
		||||
#       a placeholder and/or for debugging purposes.
 | 
			
		||||
#
 | 
			
		||||
#       Use the face geometry derived from the face detection landmarks
 | 
			
		||||
#       (keypoints) for the face pose transformation matrix, not the mesh.
 | 
			
		||||
canonical_mesh: {
 | 
			
		||||
  vertex_type: VERTEX_PT
 | 
			
		||||
  primitive_type: TRIANGLE
 | 
			
		||||
  vertex_buffer: -3.1511454582214355
 | 
			
		||||
  vertex_buffer: 2.6246179342269897
 | 
			
		||||
  vertex_buffer: 3.4656630754470825
 | 
			
		||||
  vertex_buffer: 0.349575996398926
 | 
			
		||||
  vertex_buffer: 0.38137748837470997
 | 
			
		||||
  vertex_buffer: 3.1511454582214355
 | 
			
		||||
  vertex_buffer: 2.6246179342269897
 | 
			
		||||
  vertex_buffer: 3.4656630754470825
 | 
			
		||||
  vertex_buffer: 0.650443494319916
 | 
			
		||||
  vertex_buffer: 0.38137999176979054
 | 
			
		||||
  vertex_buffer: 0.0
 | 
			
		||||
  vertex_buffer: -1.126865029335022
 | 
			
		||||
  vertex_buffer: 7.475604057312012
 | 
			
		||||
  vertex_buffer: 0.500025987625122
 | 
			
		||||
  vertex_buffer: 0.547487020492554
 | 
			
		||||
  vertex_buffer: 0.0
 | 
			
		||||
  vertex_buffer: -4.304508209228516
 | 
			
		||||
  vertex_buffer: 4.162498950958252
 | 
			
		||||
  vertex_buffer: 0.499989986419678
 | 
			
		||||
  vertex_buffer: 0.694203019142151
 | 
			
		||||
  vertex_buffer: -7.664182186126709
 | 
			
		||||
  vertex_buffer: 0.673132002353668
 | 
			
		||||
  vertex_buffer: -2.435867071151733
 | 
			
		||||
  vertex_buffer: 0.007561000064015
 | 
			
		||||
  vertex_buffer: 0.480777025222778
 | 
			
		||||
  vertex_buffer: 7.664182186126709
 | 
			
		||||
  vertex_buffer: 0.673132002353668
 | 
			
		||||
  vertex_buffer: -2.435867071151733
 | 
			
		||||
  vertex_buffer: 0.992439985275269
 | 
			
		||||
  vertex_buffer: 0.480777025222778
 | 
			
		||||
  index_buffer: 0
 | 
			
		||||
  index_buffer: 1
 | 
			
		||||
  index_buffer: 2
 | 
			
		||||
  index_buffer: 1
 | 
			
		||||
  index_buffer: 5
 | 
			
		||||
  index_buffer: 2
 | 
			
		||||
  index_buffer: 4
 | 
			
		||||
  index_buffer: 0
 | 
			
		||||
  index_buffer: 2
 | 
			
		||||
  index_buffer: 4
 | 
			
		||||
  index_buffer: 2
 | 
			
		||||
  index_buffer: 3
 | 
			
		||||
  index_buffer: 2
 | 
			
		||||
  index_buffer: 5
 | 
			
		||||
  index_buffer: 3
 | 
			
		||||
}
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										80
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,80 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//visibility:public"])
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "geometry_pipeline",
 | 
			
		||||
    srcs = ["geometry_pipeline.cc"],
 | 
			
		||||
    hdrs = ["geometry_pipeline.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":mesh_3d_utils",
 | 
			
		||||
        ":procrustes_solver",
 | 
			
		||||
        ":validation_utils",
 | 
			
		||||
        "//mediapipe/framework/formats:landmark_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/formats:matrix",
 | 
			
		||||
        "//mediapipe/framework/formats:matrix_data_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/framework/port:statusor",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
 | 
			
		||||
        "@com_google_absl//absl/memory",
 | 
			
		||||
        "@eigen_archive//:eigen3",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "mesh_3d_utils",
 | 
			
		||||
    srcs = ["mesh_3d_utils.cc"],
 | 
			
		||||
    hdrs = ["mesh_3d_utils.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:statusor",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "procrustes_solver",
 | 
			
		||||
    srcs = ["procrustes_solver.cc"],
 | 
			
		||||
    hdrs = ["procrustes_solver.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/framework/port:statusor",
 | 
			
		||||
        "@com_google_absl//absl/memory",
 | 
			
		||||
        "@eigen_archive//:eigen3",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "validation_utils",
 | 
			
		||||
    srcs = ["validation_utils.cc"],
 | 
			
		||||
    hdrs = ["validation_utils.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":mesh_3d_utils",
 | 
			
		||||
        "//mediapipe/framework/formats:matrix_data_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:environment_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:face_geometry_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:geometry_pipeline_metadata_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/face_geometry/proto:mesh_3d_cc_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,471 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/geometry_pipeline.h"
 | 
			
		||||
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "Eigen/Core"
 | 
			
		||||
#include "absl/memory/memory.h"
 | 
			
		||||
#include "mediapipe/framework/formats/landmark.pb.h"
 | 
			
		||||
#include "mediapipe/framework/formats/matrix.h"
 | 
			
		||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
 | 
			
		||||
#include "mediapipe/framework/port/ret_check.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/framework/port/status_macros.h"
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
struct PerspectiveCameraFrustum {
 | 
			
		||||
  // NOTE: all arguments must be validated prior to calling this constructor.
 | 
			
		||||
  PerspectiveCameraFrustum(const proto::PerspectiveCamera& perspective_camera,
 | 
			
		||||
                           int frame_width, int frame_height) {
 | 
			
		||||
    static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
 | 
			
		||||
 | 
			
		||||
    const float height_at_near =
 | 
			
		||||
        2.f * perspective_camera.near() *
 | 
			
		||||
        std::tan(0.5f * kDegreesToRadians *
 | 
			
		||||
                 perspective_camera.vertical_fov_degrees());
 | 
			
		||||
 | 
			
		||||
    const float width_at_near = frame_width * height_at_near / frame_height;
 | 
			
		||||
 | 
			
		||||
    left = -0.5f * width_at_near;
 | 
			
		||||
    right = 0.5f * width_at_near;
 | 
			
		||||
    bottom = -0.5f * height_at_near;
 | 
			
		||||
    top = 0.5f * height_at_near;
 | 
			
		||||
    near = perspective_camera.near();
 | 
			
		||||
    far = perspective_camera.far();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  float left;
 | 
			
		||||
  float right;
 | 
			
		||||
  float bottom;
 | 
			
		||||
  float top;
 | 
			
		||||
  float near;
 | 
			
		||||
  float far;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ScreenToMetricSpaceConverter {
 | 
			
		||||
 public:
 | 
			
		||||
  ScreenToMetricSpaceConverter(
 | 
			
		||||
      proto::OriginPointLocation origin_point_location,  //
 | 
			
		||||
      proto::InputSource input_source,                   //
 | 
			
		||||
      Eigen::Matrix3Xf&& canonical_metric_landmarks,     //
 | 
			
		||||
      Eigen::VectorXf&& landmark_weights,                //
 | 
			
		||||
      std::unique_ptr<ProcrustesSolver> procrustes_solver)
 | 
			
		||||
      : origin_point_location_(origin_point_location),
 | 
			
		||||
        input_source_(input_source),
 | 
			
		||||
        canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
 | 
			
		||||
        landmark_weights_(std::move(landmark_weights)),
 | 
			
		||||
        procrustes_solver_(std::move(procrustes_solver)) {}
 | 
			
		||||
 | 
			
		||||
  // Converts `screen_landmark_list` into `metric_landmark_list` and estimates
 | 
			
		||||
  // the `pose_transform_mat`.
 | 
			
		||||
  //
 | 
			
		||||
  // Here's the algorithm summary:
 | 
			
		||||
  //
 | 
			
		||||
  // (1) Project X- and Y- screen landmark coordinates at the Z near plane.
 | 
			
		||||
  //
 | 
			
		||||
  // (2) Estimate a canonical-to-runtime landmark set scale by running the
 | 
			
		||||
  //     Procrustes solver using the screen runtime landmarks.
 | 
			
		||||
  //
 | 
			
		||||
  //     On this iteration, screen landmarks are used instead of unprojected
 | 
			
		||||
  //     metric landmarks as it is not safe to unproject due to the relative
 | 
			
		||||
  //     nature of the input screen landmark Z coordinate.
 | 
			
		||||
  //
 | 
			
		||||
  // (3) Use the canonical-to-runtime scale from (2) to unproject the screen
 | 
			
		||||
  //     landmarks. The result is referenced as "intermediate landmarks" because
 | 
			
		||||
  //     they are the first estimation of the resuling metric landmarks, but are
 | 
			
		||||
  //     not quite there yet.
 | 
			
		||||
  //
 | 
			
		||||
  // (4) Estimate a canonical-to-runtime landmark set scale by running the
 | 
			
		||||
  //     Procrustes solver using the intermediate runtime landmarks.
 | 
			
		||||
  //
 | 
			
		||||
  // (5) Use the product of the scale factors from (2) and (4) to unproject
 | 
			
		||||
  //     the screen landmarks the second time. This is the second and the final
 | 
			
		||||
  //     estimation of the metric landmarks.
 | 
			
		||||
  //
 | 
			
		||||
  // (6) Multiply each of the metric landmarks by the inverse pose
 | 
			
		||||
  //     transformation matrix to align the runtime metric face landmarks with
 | 
			
		||||
  //     the canonical metric face landmarks.
 | 
			
		||||
  //
 | 
			
		||||
  // Note: the input screen landmarks are in the left-handed coordinate system,
 | 
			
		||||
  //       however any metric landmarks - including the canonical metric
 | 
			
		||||
  //       landmarks, the final runtime metric landmarks and any intermediate
 | 
			
		||||
  //       runtime metric landmarks - are in the right-handed coordinate system.
 | 
			
		||||
  //
 | 
			
		||||
  //       To keep the logic correct, the landmark set handedness is changed any
 | 
			
		||||
  //       time the screen-to-metric semantic barrier is passed.
 | 
			
		||||
  absl::Status Convert(
 | 
			
		||||
      const mediapipe::NormalizedLandmarkList& screen_landmark_list,  //
 | 
			
		||||
      const PerspectiveCameraFrustum& pcf,                            //
 | 
			
		||||
      mediapipe::LandmarkList& metric_landmark_list,                  //
 | 
			
		||||
      Eigen::Matrix4f& pose_transform_mat) const {
 | 
			
		||||
    RET_CHECK_EQ(screen_landmark_list.landmark_size(),
 | 
			
		||||
                 canonical_metric_landmarks_.cols())
 | 
			
		||||
        << "The number of landmarks doesn't match the number passed upon "
 | 
			
		||||
           "initialization!";
 | 
			
		||||
 | 
			
		||||
    Eigen::Matrix3Xf screen_landmarks;
 | 
			
		||||
    ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
 | 
			
		||||
 | 
			
		||||
    ProjectXY(pcf, screen_landmarks);
 | 
			
		||||
    const float depth_offset = screen_landmarks.row(2).mean();
 | 
			
		||||
 | 
			
		||||
    // 1st iteration: don't unproject XY because it's unsafe to do so due to
 | 
			
		||||
    //                the relative nature of the Z coordinate. Instead, run the
 | 
			
		||||
    //                first estimation on the projected XY and use that scale to
 | 
			
		||||
    //                unproject for the 2nd iteration.
 | 
			
		||||
    Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
 | 
			
		||||
    ChangeHandedness(intermediate_landmarks);
 | 
			
		||||
 | 
			
		||||
    ASSIGN_OR_RETURN(const float first_iteration_scale,
 | 
			
		||||
                     EstimateScale(intermediate_landmarks),
 | 
			
		||||
                     _ << "Failed to estimate first iteration scale!");
 | 
			
		||||
 | 
			
		||||
    // 2nd iteration: unproject XY using the scale from the 1st iteration.
 | 
			
		||||
    intermediate_landmarks = screen_landmarks;
 | 
			
		||||
    MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
 | 
			
		||||
                    intermediate_landmarks);
 | 
			
		||||
    UnprojectXY(pcf, intermediate_landmarks);
 | 
			
		||||
    ChangeHandedness(intermediate_landmarks);
 | 
			
		||||
 | 
			
		||||
    // For face detection input landmarks, re-write Z-coord from the canonical
 | 
			
		||||
    // landmarks.
 | 
			
		||||
    if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
 | 
			
		||||
      Eigen::Matrix4f intermediate_pose_transform_mat;
 | 
			
		||||
      MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
 | 
			
		||||
          canonical_metric_landmarks_, intermediate_landmarks,
 | 
			
		||||
          landmark_weights_, intermediate_pose_transform_mat))
 | 
			
		||||
          << "Failed to estimate pose transform matrix!";
 | 
			
		||||
 | 
			
		||||
      intermediate_landmarks.row(2) =
 | 
			
		||||
          (intermediate_pose_transform_mat *
 | 
			
		||||
           canonical_metric_landmarks_.colwise().homogeneous())
 | 
			
		||||
              .row(2);
 | 
			
		||||
    }
 | 
			
		||||
    ASSIGN_OR_RETURN(const float second_iteration_scale,
 | 
			
		||||
                     EstimateScale(intermediate_landmarks),
 | 
			
		||||
                     _ << "Failed to estimate second iteration scale!");
 | 
			
		||||
 | 
			
		||||
    // Use the total scale to unproject the screen landmarks.
 | 
			
		||||
    const float total_scale = first_iteration_scale * second_iteration_scale;
 | 
			
		||||
    MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
 | 
			
		||||
    UnprojectXY(pcf, screen_landmarks);
 | 
			
		||||
    ChangeHandedness(screen_landmarks);
 | 
			
		||||
 | 
			
		||||
    // At this point, screen landmarks are converted into metric landmarks.
 | 
			
		||||
    Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
 | 
			
		||||
 | 
			
		||||
    MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
 | 
			
		||||
        canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
 | 
			
		||||
        pose_transform_mat))
 | 
			
		||||
        << "Failed to estimate pose transform matrix!";
 | 
			
		||||
 | 
			
		||||
    // For face detection input landmarks, re-write Z-coord from the canonical
 | 
			
		||||
    // landmarks and run the pose transform estimation again.
 | 
			
		||||
    if (input_source_ == proto::InputSource::FACE_DETECTION_PIPELINE) {
 | 
			
		||||
      metric_landmarks.row(2) =
 | 
			
		||||
          (pose_transform_mat *
 | 
			
		||||
           canonical_metric_landmarks_.colwise().homogeneous())
 | 
			
		||||
              .row(2);
 | 
			
		||||
 | 
			
		||||
      MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
 | 
			
		||||
          canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
 | 
			
		||||
          pose_transform_mat))
 | 
			
		||||
          << "Failed to estimate pose transform matrix!";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Multiply each of the metric landmarks by the inverse pose
 | 
			
		||||
    // transformation matrix to align the runtime metric face landmarks with
 | 
			
		||||
    // the canonical metric face landmarks.
 | 
			
		||||
    metric_landmarks = (pose_transform_mat.inverse() *
 | 
			
		||||
                        metric_landmarks.colwise().homogeneous())
 | 
			
		||||
                           .topRows(3);
 | 
			
		||||
 | 
			
		||||
    ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  void ProjectXY(const PerspectiveCameraFrustum& pcf,
 | 
			
		||||
                 Eigen::Matrix3Xf& landmarks) const {
 | 
			
		||||
    float x_scale = pcf.right - pcf.left;
 | 
			
		||||
    float y_scale = pcf.top - pcf.bottom;
 | 
			
		||||
    float x_translation = pcf.left;
 | 
			
		||||
    float y_translation = pcf.bottom;
 | 
			
		||||
 | 
			
		||||
    if (origin_point_location_ == proto::OriginPointLocation::TOP_LEFT_CORNER) {
 | 
			
		||||
      landmarks.row(1) = 1.f - landmarks.row(1).array();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    landmarks =
 | 
			
		||||
        landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
 | 
			
		||||
    landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
 | 
			
		||||
    Eigen::Matrix4f transform_mat;
 | 
			
		||||
    MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
 | 
			
		||||
        canonical_metric_landmarks_, landmarks, landmark_weights_,
 | 
			
		||||
        transform_mat))
 | 
			
		||||
        << "Failed to estimate canonical-to-runtime landmark set transform!";
 | 
			
		||||
 | 
			
		||||
    return transform_mat.col(0).norm();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
 | 
			
		||||
                              float depth_offset, float scale,
 | 
			
		||||
                              Eigen::Matrix3Xf& landmarks) {
 | 
			
		||||
    landmarks.row(2) =
 | 
			
		||||
        (landmarks.array().row(2) - depth_offset + pcf.near) / scale;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
 | 
			
		||||
                          Eigen::Matrix3Xf& landmarks) {
 | 
			
		||||
    landmarks.row(0) =
 | 
			
		||||
        landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
 | 
			
		||||
    landmarks.row(1) =
 | 
			
		||||
        landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
 | 
			
		||||
    landmarks.row(2) *= -1.f;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void ConvertLandmarkListToEigenMatrix(
 | 
			
		||||
      const mediapipe::NormalizedLandmarkList& landmark_list,
 | 
			
		||||
      Eigen::Matrix3Xf& eigen_matrix) {
 | 
			
		||||
    eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
 | 
			
		||||
    for (int i = 0; i < landmark_list.landmark_size(); ++i) {
 | 
			
		||||
      const auto& landmark = landmark_list.landmark(i);
 | 
			
		||||
      eigen_matrix(0, i) = landmark.x();
 | 
			
		||||
      eigen_matrix(1, i) = landmark.y();
 | 
			
		||||
      eigen_matrix(2, i) = landmark.z();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void ConvertEigenMatrixToLandmarkList(
 | 
			
		||||
      const Eigen::Matrix3Xf& eigen_matrix,
 | 
			
		||||
      mediapipe::LandmarkList& landmark_list) {
 | 
			
		||||
    landmark_list.Clear();
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < eigen_matrix.cols(); ++i) {
 | 
			
		||||
      auto& landmark = *landmark_list.add_landmark();
 | 
			
		||||
      landmark.set_x(eigen_matrix(0, i));
 | 
			
		||||
      landmark.set_y(eigen_matrix(1, i));
 | 
			
		||||
      landmark.set_z(eigen_matrix(2, i));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const proto::OriginPointLocation origin_point_location_;
 | 
			
		||||
  const proto::InputSource input_source_;
 | 
			
		||||
  Eigen::Matrix3Xf canonical_metric_landmarks_;
 | 
			
		||||
  Eigen::VectorXf landmark_weights_;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<ProcrustesSolver> procrustes_solver_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GeometryPipelineImpl : public GeometryPipeline {
 | 
			
		||||
 public:
 | 
			
		||||
  GeometryPipelineImpl(
 | 
			
		||||
      const proto::PerspectiveCamera& perspective_camera,  //
 | 
			
		||||
      const proto::Mesh3d& canonical_mesh,                 //
 | 
			
		||||
      uint32_t canonical_mesh_vertex_size,                 //
 | 
			
		||||
      uint32_t canonical_mesh_num_vertices,
 | 
			
		||||
      uint32_t canonical_mesh_vertex_position_offset,
 | 
			
		||||
      std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
 | 
			
		||||
      : perspective_camera_(perspective_camera),
 | 
			
		||||
        canonical_mesh_(canonical_mesh),
 | 
			
		||||
        canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
 | 
			
		||||
        canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
 | 
			
		||||
        canonical_mesh_vertex_position_offset_(
 | 
			
		||||
            canonical_mesh_vertex_position_offset),
 | 
			
		||||
        space_converter_(std::move(space_converter)) {}
 | 
			
		||||
 | 
			
		||||
  absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
 | 
			
		||||
      const std::vector<mediapipe::NormalizedLandmarkList>&
 | 
			
		||||
          multi_face_landmarks,
 | 
			
		||||
      int frame_width, int frame_height) const override {
 | 
			
		||||
    MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
 | 
			
		||||
        << "Invalid frame dimensions!";
 | 
			
		||||
 | 
			
		||||
    // Create a perspective camera frustum to be shared for geometry estimation
 | 
			
		||||
    // per each face.
 | 
			
		||||
    PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
 | 
			
		||||
                                 frame_height);
 | 
			
		||||
 | 
			
		||||
    std::vector<proto::FaceGeometry> multi_face_geometry;
 | 
			
		||||
 | 
			
		||||
    // From this point, the meaning of "face landmarks" is clarified further as
 | 
			
		||||
    // "screen face landmarks". This is done do distinguish from "metric face
 | 
			
		||||
    // landmarks" that are derived during the face geometry estimation process.
 | 
			
		||||
    for (const mediapipe::NormalizedLandmarkList& screen_face_landmarks :
 | 
			
		||||
         multi_face_landmarks) {
 | 
			
		||||
      // Having a too compact screen landmark list will result in numerical
 | 
			
		||||
      // instabilities, therefore such faces are filtered.
 | 
			
		||||
      if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Convert the screen landmarks into the metric landmarks and get the pose
 | 
			
		||||
      // transformation matrix.
 | 
			
		||||
      mediapipe::LandmarkList metric_face_landmarks;
 | 
			
		||||
      Eigen::Matrix4f pose_transform_mat;
 | 
			
		||||
      MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
 | 
			
		||||
                                                   metric_face_landmarks,
 | 
			
		||||
                                                   pose_transform_mat))
 | 
			
		||||
          << "Failed to convert landmarks from the screen to the metric space!";
 | 
			
		||||
 | 
			
		||||
      // Pack geometry data for this face.
 | 
			
		||||
      proto::FaceGeometry face_geometry;
 | 
			
		||||
      proto::Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
 | 
			
		||||
      // Copy the canonical face mesh as the face geometry mesh.
 | 
			
		||||
      mutable_mesh->CopyFrom(canonical_mesh_);
 | 
			
		||||
      // Replace XYZ vertex mesh coodinates with the metric landmark positions.
 | 
			
		||||
      for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
 | 
			
		||||
        uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
 | 
			
		||||
                                        canonical_mesh_vertex_position_offset_;
 | 
			
		||||
 | 
			
		||||
        mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
 | 
			
		||||
                                        metric_face_landmarks.landmark(i).x());
 | 
			
		||||
        mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
 | 
			
		||||
                                        metric_face_landmarks.landmark(i).y());
 | 
			
		||||
        mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
 | 
			
		||||
                                        metric_face_landmarks.landmark(i).z());
 | 
			
		||||
      }
 | 
			
		||||
      // Populate the face pose transformation matrix.
 | 
			
		||||
      mediapipe::MatrixDataProtoFromMatrix(
 | 
			
		||||
          pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
 | 
			
		||||
 | 
			
		||||
      multi_face_geometry.push_back(face_geometry);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return multi_face_geometry;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  static bool IsScreenLandmarkListTooCompact(
 | 
			
		||||
      const mediapipe::NormalizedLandmarkList& screen_landmarks) {
 | 
			
		||||
    float mean_x = 0.f;
 | 
			
		||||
    float mean_y = 0.f;
 | 
			
		||||
    for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
 | 
			
		||||
      const auto& landmark = screen_landmarks.landmark(i);
 | 
			
		||||
      mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
 | 
			
		||||
      mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    float max_sq_dist = 0.f;
 | 
			
		||||
    for (const auto& landmark : screen_landmarks.landmark()) {
 | 
			
		||||
      const float d_x = landmark.x() - mean_x;
 | 
			
		||||
      const float d_y = landmark.y() - mean_y;
 | 
			
		||||
      max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
 | 
			
		||||
    return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const proto::PerspectiveCamera perspective_camera_;
 | 
			
		||||
  const proto::Mesh3d canonical_mesh_;
 | 
			
		||||
  const uint32_t canonical_mesh_vertex_size_;
 | 
			
		||||
  const uint32_t canonical_mesh_num_vertices_;
 | 
			
		||||
  const uint32_t canonical_mesh_vertex_position_offset_;
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
 | 
			
		||||
    const proto::Environment& environment,
 | 
			
		||||
    const proto::GeometryPipelineMetadata& metadata) {
 | 
			
		||||
  MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
 | 
			
		||||
      << "Invalid environment!";
 | 
			
		||||
  MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
 | 
			
		||||
      << "Invalid geometry pipeline metadata!";
 | 
			
		||||
 | 
			
		||||
  const auto& canonical_mesh = metadata.canonical_mesh();
 | 
			
		||||
  RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
 | 
			
		||||
                               VertexComponent::POSITION))
 | 
			
		||||
      << "Canonical face mesh must have the `POSITION` vertex component!";
 | 
			
		||||
  RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
 | 
			
		||||
                               VertexComponent::TEX_COORD))
 | 
			
		||||
      << "Canonical face mesh must have the `TEX_COORD` vertex component!";
 | 
			
		||||
 | 
			
		||||
  uint32_t canonical_mesh_vertex_size =
 | 
			
		||||
      GetVertexSize(canonical_mesh.vertex_type());
 | 
			
		||||
  uint32_t canonical_mesh_num_vertices =
 | 
			
		||||
      canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
 | 
			
		||||
  uint32_t canonical_mesh_vertex_position_offset =
 | 
			
		||||
      GetVertexComponentOffset(canonical_mesh.vertex_type(),
 | 
			
		||||
                               VertexComponent::POSITION)
 | 
			
		||||
          .value();
 | 
			
		||||
 | 
			
		||||
  // Put the Procrustes landmark basis into Eigen matrices for an easier access.
 | 
			
		||||
  Eigen::Matrix3Xf canonical_metric_landmarks =
 | 
			
		||||
      Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
 | 
			
		||||
  Eigen::VectorXf landmark_weights =
 | 
			
		||||
      Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
 | 
			
		||||
 | 
			
		||||
  for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
 | 
			
		||||
    uint32_t vertex_buffer_offset =
 | 
			
		||||
        canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
 | 
			
		||||
 | 
			
		||||
    canonical_metric_landmarks(0, i) =
 | 
			
		||||
        canonical_mesh.vertex_buffer(vertex_buffer_offset);
 | 
			
		||||
    canonical_metric_landmarks(1, i) =
 | 
			
		||||
        canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
 | 
			
		||||
    canonical_metric_landmarks(2, i) =
 | 
			
		||||
        canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (const proto::WeightedLandmarkRef& wlr :
 | 
			
		||||
       metadata.procrustes_landmark_basis()) {
 | 
			
		||||
    uint32_t landmark_id = wlr.landmark_id();
 | 
			
		||||
    landmark_weights(landmark_id) = wlr.weight();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<GeometryPipeline> result =
 | 
			
		||||
      absl::make_unique<GeometryPipelineImpl>(
 | 
			
		||||
          environment.perspective_camera(), canonical_mesh,
 | 
			
		||||
          canonical_mesh_vertex_size, canonical_mesh_num_vertices,
 | 
			
		||||
          canonical_mesh_vertex_position_offset,
 | 
			
		||||
          absl::make_unique<ScreenToMetricSpaceConverter>(
 | 
			
		||||
              environment.origin_point_location(),
 | 
			
		||||
              metadata.input_source() == proto::InputSource::DEFAULT
 | 
			
		||||
                  ? proto::InputSource::FACE_LANDMARK_PIPELINE
 | 
			
		||||
                  : metadata.input_source(),
 | 
			
		||||
              std::move(canonical_metric_landmarks),
 | 
			
		||||
              std::move(landmark_weights),
 | 
			
		||||
              CreateFloatPrecisionProcrustesSolver()));
 | 
			
		||||
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,69 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
 | 
			
		||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/landmark.pb.h"
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
 | 
			
		||||
// Encapsulates a stateless estimator of facial geometry in a Metric space based
 | 
			
		||||
// on the normalized face landmarks in the Screen space.
 | 
			
		||||
class GeometryPipeline {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual ~GeometryPipeline() = default;
 | 
			
		||||
 | 
			
		||||
  // Estimates geometry data for multiple faces.
 | 
			
		||||
  //
 | 
			
		||||
  // Returns an error status if any of the passed arguments is invalid.
 | 
			
		||||
  //
 | 
			
		||||
  // The result includes face geometry data for a subset of the input faces,
 | 
			
		||||
  // however geometry data for some faces might be missing. This may happen if
 | 
			
		||||
  // it'd be unstable to estimate the facial geometry based on a corresponding
 | 
			
		||||
  // face landmark list for any reason (for example, if the landmark list is too
 | 
			
		||||
  // compact).
 | 
			
		||||
  //
 | 
			
		||||
  // Each face landmark list must have the same number of landmarks as was
 | 
			
		||||
  // passed upon initialization via the canonical face mesh (as a part of the
 | 
			
		||||
  // geometry pipeline metadata).
 | 
			
		||||
  //
 | 
			
		||||
  // Both `frame_width` and `frame_height` must be positive.
 | 
			
		||||
  virtual absl::StatusOr<std::vector<proto::FaceGeometry>> EstimateFaceGeometry(
 | 
			
		||||
      const std::vector<mediapipe::NormalizedLandmarkList>&
 | 
			
		||||
          multi_face_landmarks,
 | 
			
		||||
      int frame_width, int frame_height) const = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Creates an instance of `GeometryPipeline`.
 | 
			
		||||
//
 | 
			
		||||
// Both `environment` and `metadata` must be valid (for details, please refer to
 | 
			
		||||
// the proto message definition comments and/or `validation_utils.h/cc`).
 | 
			
		||||
//
 | 
			
		||||
// Canonical face mesh (defined as a part of `metadata`) must have the
 | 
			
		||||
// `POSITION` and the `TEX_COORD` vertex components.
 | 
			
		||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
 | 
			
		||||
    const proto::Environment& environment,
 | 
			
		||||
    const proto::GeometryPipelineMetadata& metadata);
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
 | 
			
		||||
							
								
								
									
										103
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.cc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,103 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/port/ret_check.h"
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
 | 
			
		||||
  switch (vertex_component) {
 | 
			
		||||
    case VertexComponent::POSITION:
 | 
			
		||||
    case VertexComponent::TEX_COORD:
 | 
			
		||||
      return true;
 | 
			
		||||
 | 
			
		||||
    default:
 | 
			
		||||
      return false;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
 | 
			
		||||
  switch (vertex_component) {
 | 
			
		||||
    case VertexComponent::POSITION:
 | 
			
		||||
      return 3;
 | 
			
		||||
    case VertexComponent::TEX_COORD:
 | 
			
		||||
      return 2;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
 | 
			
		||||
  switch (vertex_component) {
 | 
			
		||||
    case VertexComponent::POSITION:
 | 
			
		||||
      return 0;
 | 
			
		||||
    case VertexComponent::TEX_COORD:
 | 
			
		||||
      return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type) {
 | 
			
		||||
  switch (vertex_type) {
 | 
			
		||||
    case proto::Mesh3d::VERTEX_PT:
 | 
			
		||||
      return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
 | 
			
		||||
             GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type) {
 | 
			
		||||
  switch (primitive_type) {
 | 
			
		||||
    case proto::Mesh3d::TRIANGLE:
 | 
			
		||||
      return 3;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
 | 
			
		||||
                        VertexComponent vertex_component) {
 | 
			
		||||
  switch (vertex_type) {
 | 
			
		||||
    case proto::Mesh3d::VERTEX_PT:
 | 
			
		||||
      return HasVertexComponentVertexPT(vertex_component);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
 | 
			
		||||
    proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
 | 
			
		||||
  RET_CHECK(HasVertexComponentVertexPT(vertex_component))
 | 
			
		||||
      << "A given vertex type doesn't have the requested component!";
 | 
			
		||||
 | 
			
		||||
  switch (vertex_type) {
 | 
			
		||||
    case proto::Mesh3d::VERTEX_PT:
 | 
			
		||||
      return GetVertexComponentOffsetVertexPT(vertex_component);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
 | 
			
		||||
    proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
 | 
			
		||||
  RET_CHECK(HasVertexComponentVertexPT(vertex_component))
 | 
			
		||||
      << "A given vertex type doesn't have the requested component!";
 | 
			
		||||
 | 
			
		||||
  switch (vertex_type) {
 | 
			
		||||
    case proto::Mesh3d::VERTEX_PT:
 | 
			
		||||
      return GetVertexComponentSizeVertexPT(vertex_component);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
							
								
								
									
										51
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,51 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
 | 
			
		||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
 | 
			
		||||
enum class VertexComponent { POSITION, TEX_COORD };
 | 
			
		||||
 | 
			
		||||
std::size_t GetVertexSize(proto::Mesh3d::VertexType vertex_type);
 | 
			
		||||
 | 
			
		||||
std::size_t GetPrimitiveSize(proto::Mesh3d::PrimitiveType primitive_type);
 | 
			
		||||
 | 
			
		||||
bool HasVertexComponent(proto::Mesh3d::VertexType vertex_type,
 | 
			
		||||
                        VertexComponent vertex_component);
 | 
			
		||||
 | 
			
		||||
// Computes the vertex component offset.
 | 
			
		||||
//
 | 
			
		||||
// Returns an error status if a given vertex type doesn't have the requested
 | 
			
		||||
// component.
 | 
			
		||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
 | 
			
		||||
    proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
 | 
			
		||||
 | 
			
		||||
// Computes the vertex component size.
 | 
			
		||||
//
 | 
			
		||||
// Returns an error status if a given vertex type doesn't have the requested
 | 
			
		||||
// component.
 | 
			
		||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
 | 
			
		||||
    proto::Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,264 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/procrustes_solver.h"
 | 
			
		||||
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <memory>
 | 
			
		||||
 | 
			
		||||
#include "Eigen/Dense"
 | 
			
		||||
#include "absl/memory/memory.h"
 | 
			
		||||
#include "mediapipe/framework/port/ret_check.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/framework/port/status_macros.h"
 | 
			
		||||
#include "mediapipe/framework/port/statusor.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
 | 
			
		||||
 public:
 | 
			
		||||
  FloatPrecisionProcrustesSolver() = default;
 | 
			
		||||
 | 
			
		||||
  absl::Status SolveWeightedOrthogonalProblem(
 | 
			
		||||
      const Eigen::Matrix3Xf& source_points,  //
 | 
			
		||||
      const Eigen::Matrix3Xf& target_points,  //
 | 
			
		||||
      const Eigen::VectorXf& point_weights,
 | 
			
		||||
      Eigen::Matrix4f& transform_mat) const override {
 | 
			
		||||
    // Validate inputs.
 | 
			
		||||
    MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
 | 
			
		||||
        << "Failed to validate weighted orthogonal problem input points!";
 | 
			
		||||
    MP_RETURN_IF_ERROR(
 | 
			
		||||
        ValidatePointWeights(source_points.cols(), point_weights))
 | 
			
		||||
        << "Failed to validate weighted orthogonal problem point weights!";
 | 
			
		||||
 | 
			
		||||
    // Extract square root from the point weights.
 | 
			
		||||
    Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
 | 
			
		||||
 | 
			
		||||
    // Try to solve the WEOP problem.
 | 
			
		||||
    MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
 | 
			
		||||
        source_points, target_points, sqrt_weights, transform_mat))
 | 
			
		||||
        << "Failed to solve the WEOP problem!";
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  static constexpr float kAbsoluteErrorEps = 1e-9f;
 | 
			
		||||
 | 
			
		||||
  static absl::Status ValidateInputPoints(
 | 
			
		||||
      const Eigen::Matrix3Xf& source_points,
 | 
			
		||||
      const Eigen::Matrix3Xf& target_points) {
 | 
			
		||||
    RET_CHECK_GT(source_points.cols(), 0)
 | 
			
		||||
        << "The number of source points must be positive!";
 | 
			
		||||
 | 
			
		||||
    RET_CHECK_EQ(source_points.cols(), target_points.cols())
 | 
			
		||||
        << "The number of source and target points must be equal!";
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static absl::Status ValidatePointWeights(
 | 
			
		||||
      int num_points, const Eigen::VectorXf& point_weights) {
 | 
			
		||||
    RET_CHECK_GT(point_weights.size(), 0)
 | 
			
		||||
        << "The number of point weights must be positive!";
 | 
			
		||||
 | 
			
		||||
    RET_CHECK_EQ(point_weights.size(), num_points)
 | 
			
		||||
        << "The number of points and point weights must be equal!";
 | 
			
		||||
 | 
			
		||||
    float total_weight = 0.f;
 | 
			
		||||
    for (int i = 0; i < num_points; ++i) {
 | 
			
		||||
      RET_CHECK_GE(point_weights(i), 0.f)
 | 
			
		||||
          << "Each point weight must be non-negative!";
 | 
			
		||||
 | 
			
		||||
      total_weight += point_weights(i);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
 | 
			
		||||
        << "The total point weight is too small!";
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static Eigen::VectorXf ExtractSquareRoot(
 | 
			
		||||
      const Eigen::VectorXf& point_weights) {
 | 
			
		||||
    Eigen::VectorXf sqrt_weights(point_weights);
 | 
			
		||||
    for (int i = 0; i < sqrt_weights.size(); ++i) {
 | 
			
		||||
      sqrt_weights(i) = std::sqrt(sqrt_weights(i));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return sqrt_weights;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
 | 
			
		||||
  // a single 4x4 transformation matrix.
 | 
			
		||||
  static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
 | 
			
		||||
                                                const Eigen::Vector3f& t) {
 | 
			
		||||
    Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
 | 
			
		||||
    result.leftCols(3).topRows(3) = r_and_s;
 | 
			
		||||
    result.col(3).topRows(3) = t;
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // The weighted problem is thoroughly addressed in Section 2.4 of:
 | 
			
		||||
  // D. Akca, Generalized Procrustes analysis and its applications
 | 
			
		||||
  // in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
 | 
			
		||||
  //
 | 
			
		||||
  // Notable differences in the code presented here are:
 | 
			
		||||
  //
 | 
			
		||||
  //   * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
 | 
			
		||||
  //     Our W_p is diagonal (equal to diag(sqrt_weights^2)),
 | 
			
		||||
  //     so we can just set Q = diag(sqrt_weights) instead.
 | 
			
		||||
  //
 | 
			
		||||
  //   * In the paper, the problem is presented as
 | 
			
		||||
  //     (for W_k = I and W_p = tranposed(Q) Q):
 | 
			
		||||
  //     || Q (c A T + j tranposed(t) - B) || -> min.
 | 
			
		||||
  //
 | 
			
		||||
  //     We reformulate it as an equivalent minimization of the transpose's
 | 
			
		||||
  //     norm:
 | 
			
		||||
  //     || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
 | 
			
		||||
  //     where tranposed(A) and tranposed(B) are the source and the target point
 | 
			
		||||
  //     clouds, respectively, c tranposed(T) is the rotation+scaling R sought
 | 
			
		||||
  //     for, and Q is diag(sqrt_weights).
 | 
			
		||||
  //
 | 
			
		||||
  //     Most of the derivations are therefore transposed.
 | 
			
		||||
  //
 | 
			
		||||
  // Note: the output `transform_mat` argument is used instead of `StatusOr<>`
 | 
			
		||||
  // return type in order to avoid Eigen memory alignment issues. Details:
 | 
			
		||||
  // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
 | 
			
		||||
  static absl::Status InternalSolveWeightedOrthogonalProblem(
 | 
			
		||||
      const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
 | 
			
		||||
      const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
 | 
			
		||||
    // tranposed(A_w).
 | 
			
		||||
    Eigen::Matrix3Xf weighted_sources =
 | 
			
		||||
        sources.array().rowwise() * sqrt_weights.array().transpose();
 | 
			
		||||
    // tranposed(B_w).
 | 
			
		||||
    Eigen::Matrix3Xf weighted_targets =
 | 
			
		||||
        targets.array().rowwise() * sqrt_weights.array().transpose();
 | 
			
		||||
 | 
			
		||||
    // w = tranposed(j_w) j_w.
 | 
			
		||||
    float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
 | 
			
		||||
 | 
			
		||||
    // Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
 | 
			
		||||
    // Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
 | 
			
		||||
    //
 | 
			
		||||
    // tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
 | 
			
		||||
    // (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
 | 
			
		||||
    //
 | 
			
		||||
    // where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
 | 
			
		||||
    Eigen::Matrix3Xf twice_weighted_sources =
 | 
			
		||||
        weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
 | 
			
		||||
    Eigen::Vector3f source_center_of_mass =
 | 
			
		||||
        twice_weighted_sources.rowwise().sum() / total_weight;
 | 
			
		||||
    // tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
 | 
			
		||||
    // tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
 | 
			
		||||
    Eigen::Matrix3Xf centered_weighted_sources =
 | 
			
		||||
        weighted_sources - source_center_of_mass * sqrt_weights.transpose();
 | 
			
		||||
 | 
			
		||||
    Eigen::Matrix3f rotation;
 | 
			
		||||
    MP_RETURN_IF_ERROR(ComputeOptimalRotation(
 | 
			
		||||
        weighted_targets * centered_weighted_sources.transpose(), rotation))
 | 
			
		||||
        << "Failed to compute the optimal rotation!";
 | 
			
		||||
    ASSIGN_OR_RETURN(
 | 
			
		||||
        float scale,
 | 
			
		||||
        ComputeOptimalScale(centered_weighted_sources, weighted_sources,
 | 
			
		||||
                            weighted_targets, rotation),
 | 
			
		||||
        _ << "Failed to compute the optimal scale!");
 | 
			
		||||
 | 
			
		||||
    // R = c tranposed(T).
 | 
			
		||||
    Eigen::Matrix3f rotation_and_scale = scale * rotation;
 | 
			
		||||
 | 
			
		||||
    // Compute optimal translation for the weighted problem.
 | 
			
		||||
 | 
			
		||||
    // tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
 | 
			
		||||
    const auto pointwise_diffs =
 | 
			
		||||
        weighted_targets - rotation_and_scale * weighted_sources;
 | 
			
		||||
    // Multiplication by j_w is a respectively weighted column sum.
 | 
			
		||||
    // (54) from the paper.
 | 
			
		||||
    const auto weighted_pointwise_diffs =
 | 
			
		||||
        pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
 | 
			
		||||
    Eigen::Vector3f translation =
 | 
			
		||||
        weighted_pointwise_diffs.rowwise().sum() / total_weight;
 | 
			
		||||
 | 
			
		||||
    transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
 | 
			
		||||
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // `design_matrix` is a transposed LHS of (51) in the paper.
 | 
			
		||||
  //
 | 
			
		||||
  // Note: the output `rotation` argument is used instead of `StatusOr<>`
 | 
			
		||||
  // return type in order to avoid Eigen memory alignment issues. Details:
 | 
			
		||||
  // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
 | 
			
		||||
  static absl::Status ComputeOptimalRotation(
 | 
			
		||||
      const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
 | 
			
		||||
    RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
 | 
			
		||||
        << "Design matrix norm is too small!";
 | 
			
		||||
 | 
			
		||||
    Eigen::JacobiSVD<Eigen::Matrix3f> svd(
 | 
			
		||||
        design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
 | 
			
		||||
 | 
			
		||||
    Eigen::Matrix3f postrotation = svd.matrixU();
 | 
			
		||||
    Eigen::Matrix3f prerotation = svd.matrixV().transpose();
 | 
			
		||||
 | 
			
		||||
    // Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
 | 
			
		||||
    // see "4.6 Constrained orthogonal Procrustes problems"
 | 
			
		||||
    // in the Gower & Dijksterhuis's book "Procrustes Analysis".
 | 
			
		||||
    // We flip the sign of the least singular value along with a column in W.
 | 
			
		||||
    //
 | 
			
		||||
    // Note that now the sum of singular values doesn't work for scale
 | 
			
		||||
    // estimation due to this sign flip.
 | 
			
		||||
    if (postrotation.determinant() * prerotation.determinant() <
 | 
			
		||||
        static_cast<float>(0)) {
 | 
			
		||||
      postrotation.col(2) *= static_cast<float>(-1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Transposed (52) from the paper.
 | 
			
		||||
    rotation = postrotation * prerotation;
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static absl::StatusOr<float> ComputeOptimalScale(
 | 
			
		||||
      const Eigen::Matrix3Xf& centered_weighted_sources,
 | 
			
		||||
      const Eigen::Matrix3Xf& weighted_sources,
 | 
			
		||||
      const Eigen::Matrix3Xf& weighted_targets,
 | 
			
		||||
      const Eigen::Matrix3f& rotation) {
 | 
			
		||||
    // tranposed(T) tranposed(A_w) (I - C).
 | 
			
		||||
    const auto rotated_centered_weighted_sources =
 | 
			
		||||
        rotation * centered_weighted_sources;
 | 
			
		||||
    // Use the identity trace(A B) = sum(A * B^T)
 | 
			
		||||
    // to avoid building large intermediate matrices (* is Hadamard product).
 | 
			
		||||
    // (53) from the paper.
 | 
			
		||||
    float numerator =
 | 
			
		||||
        rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
 | 
			
		||||
    float denominator =
 | 
			
		||||
        centered_weighted_sources.cwiseProduct(weighted_sources).sum();
 | 
			
		||||
 | 
			
		||||
    RET_CHECK_GT(denominator, kAbsoluteErrorEps)
 | 
			
		||||
        << "Scale expression denominator is too small!";
 | 
			
		||||
    RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
 | 
			
		||||
        << "Scale is too small!";
 | 
			
		||||
 | 
			
		||||
    return numerator / denominator;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
 | 
			
		||||
  return absl::make_unique<FloatPrecisionProcrustesSolver>();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,70 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
 | 
			
		||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
 | 
			
		||||
#include "Eigen/Dense"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
 | 
			
		||||
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
 | 
			
		||||
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
 | 
			
		||||
// https://doi.org/10.3929/ethz-a-004656648.
 | 
			
		||||
//
 | 
			
		||||
// Given the source and the target point clouds, the algorithm estimates
 | 
			
		||||
// a 4x4 transformation matrix featuring the following semantic components:
 | 
			
		||||
//
 | 
			
		||||
//   * Uniform scale
 | 
			
		||||
//   * Rotation
 | 
			
		||||
//   * Translation
 | 
			
		||||
//
 | 
			
		||||
// The matrix maps the source point cloud into the target point cloud minimizing
 | 
			
		||||
// the Mean Squared Error.
 | 
			
		||||
class ProcrustesSolver {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual ~ProcrustesSolver() = default;
 | 
			
		||||
 | 
			
		||||
  // Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
 | 
			
		||||
  //
 | 
			
		||||
  // All `source_points`, `target_points` and `point_weights` must define the
 | 
			
		||||
  // same number of points. Elements of `point_weights` must be non-negative.
 | 
			
		||||
  //
 | 
			
		||||
  // A too small diameter of either of the point clouds will likely lead to
 | 
			
		||||
  // numerical instabilities and failure to estimate the transformation.
 | 
			
		||||
  //
 | 
			
		||||
  // A too small point cloud total weight will likely lead to numerical
 | 
			
		||||
  // instabilities and failure to estimate the transformation too.
 | 
			
		||||
  //
 | 
			
		||||
  // Small point coordinate deviation for either of the point cloud will likely
 | 
			
		||||
  // result in a failure as it will make the solution very unstable if possible.
 | 
			
		||||
  //
 | 
			
		||||
  // Note: the output `transform_mat` argument is used instead of `StatusOr<>`
 | 
			
		||||
  // return type in order to avoid Eigen memory alignment issues. Details:
 | 
			
		||||
  // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
 | 
			
		||||
  virtual absl::Status SolveWeightedOrthogonalProblem(
 | 
			
		||||
      const Eigen::Matrix3Xf& source_points,  //
 | 
			
		||||
      const Eigen::Matrix3Xf& target_points,  //
 | 
			
		||||
      const Eigen::VectorXf& point_weights,   //
 | 
			
		||||
      Eigen::Matrix4f& transform_mat) const = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
 | 
			
		||||
							
								
								
									
										127
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.cc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,127 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/validation_utils.h"
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
 | 
			
		||||
#include "mediapipe/framework/port/ret_check.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/framework/port/status_macros.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/libs/mesh_3d_utils.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
 | 
			
		||||
absl::Status ValidatePerspectiveCamera(
 | 
			
		||||
    const proto::PerspectiveCamera& perspective_camera) {
 | 
			
		||||
  static constexpr float kAbsoluteErrorEps = 1e-9f;
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
 | 
			
		||||
      << "Near Z must be greater than 0 with a margin of 10^{-9}!";
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_GT(perspective_camera.far(),
 | 
			
		||||
               perspective_camera.near() + kAbsoluteErrorEps)
 | 
			
		||||
      << "Far Z must be greater than Near Z with a margin of 10^{-9}!";
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
 | 
			
		||||
      << "Vertical FOV must be positive with a margin of 10^{-9}!";
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
 | 
			
		||||
               180.f)
 | 
			
		||||
      << "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status ValidateEnvironment(const proto::Environment& environment) {
 | 
			
		||||
  MP_RETURN_IF_ERROR(
 | 
			
		||||
      ValidatePerspectiveCamera(environment.perspective_camera()))
 | 
			
		||||
      << "Invalid perspective camera!";
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d) {
 | 
			
		||||
  const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
 | 
			
		||||
  const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
 | 
			
		||||
      << "Vertex buffer size must a multiple of the vertex size!";
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
 | 
			
		||||
      << "Index buffer size must a multiple of the primitive size!";
 | 
			
		||||
 | 
			
		||||
  const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
 | 
			
		||||
  for (uint32_t idx : mesh_3d.index_buffer()) {
 | 
			
		||||
    RET_CHECK_LT(idx, num_vertices)
 | 
			
		||||
        << "All mesh indices must refer to an existing vertex!";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry) {
 | 
			
		||||
  MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
 | 
			
		||||
 | 
			
		||||
  static constexpr char kInvalid4x4MatrixMessage[] =
 | 
			
		||||
      "Pose transformation matrix must be a 4x4 matrix!";
 | 
			
		||||
 | 
			
		||||
  const mediapipe::MatrixData& pose_transform_matrix =
 | 
			
		||||
      face_geometry.pose_transform_matrix();
 | 
			
		||||
  RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
 | 
			
		||||
  RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
 | 
			
		||||
  RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
 | 
			
		||||
      << kInvalid4x4MatrixMessage;
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status ValidateGeometryPipelineMetadata(
 | 
			
		||||
    const proto::GeometryPipelineMetadata& metadata) {
 | 
			
		||||
  MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
 | 
			
		||||
      << "Invalid canonical mesh!";
 | 
			
		||||
 | 
			
		||||
  RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
 | 
			
		||||
 | 
			
		||||
      << "Procrustes landmark basis must be non-empty!";
 | 
			
		||||
 | 
			
		||||
  const int num_vertices =
 | 
			
		||||
      metadata.canonical_mesh().vertex_buffer_size() /
 | 
			
		||||
      GetVertexSize(metadata.canonical_mesh().vertex_type());
 | 
			
		||||
  for (const proto::WeightedLandmarkRef& wlr :
 | 
			
		||||
       metadata.procrustes_landmark_basis()) {
 | 
			
		||||
    RET_CHECK_LT(wlr.landmark_id(), num_vertices)
 | 
			
		||||
        << "All Procrustes basis indices must refer to an existing canonical "
 | 
			
		||||
           "mesh vertex!";
 | 
			
		||||
 | 
			
		||||
    RET_CHECK_GE(wlr.weight(), 0.f)
 | 
			
		||||
        << "All Procrustes basis landmarks must have a non-negative weight!";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
 | 
			
		||||
  RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
 | 
			
		||||
  RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,70 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#ifndef MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
 | 
			
		||||
#define MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/environment.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe::tasks::vision::face_geometry {
 | 
			
		||||
 | 
			
		||||
// Validates `perspective_camera`.
 | 
			
		||||
//
 | 
			
		||||
// Near Z must be greater than 0 with a margin of `1e-9`.
 | 
			
		||||
// Far Z must be greater than Near Z with a margin of `1e-9`.
 | 
			
		||||
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
 | 
			
		||||
// edges.
 | 
			
		||||
absl::Status ValidatePerspectiveCamera(
 | 
			
		||||
    const proto::PerspectiveCamera& perspective_camera);
 | 
			
		||||
 | 
			
		||||
// Validates `environment`.
 | 
			
		||||
//
 | 
			
		||||
// Environment's perspective camera must be valid.
 | 
			
		||||
absl::Status ValidateEnvironment(const proto::Environment& environment);
 | 
			
		||||
 | 
			
		||||
// Validates `mesh_3d`.
 | 
			
		||||
//
 | 
			
		||||
// Mesh vertex buffer size must a multiple of the vertex size.
 | 
			
		||||
// Mesh index buffer size must a multiple of the primitive size.
 | 
			
		||||
// All mesh indices must reference an existing mesh vertex.
 | 
			
		||||
absl::Status ValidateMesh3d(const proto::Mesh3d& mesh_3d);
 | 
			
		||||
 | 
			
		||||
// Validates `face_geometry`.
 | 
			
		||||
//
 | 
			
		||||
// Face mesh must be valid.
 | 
			
		||||
// Face pose transformation matrix must be a 4x4 matrix.
 | 
			
		||||
absl::Status ValidateFaceGeometry(const proto::FaceGeometry& face_geometry);
 | 
			
		||||
 | 
			
		||||
// Validates `metadata`.
 | 
			
		||||
//
 | 
			
		||||
// Canonical face mesh must be valid.
 | 
			
		||||
// Procrustes landmark basis must be non-empty.
 | 
			
		||||
// All Procrustes basis indices must reference an existing canonical mesh
 | 
			
		||||
// vertex.
 | 
			
		||||
// All Procrustes basis landmarks must have a non-negative weight.
 | 
			
		||||
absl::Status ValidateGeometryPipelineMetadata(
 | 
			
		||||
    const proto::GeometryPipelineMetadata& metadata);
 | 
			
		||||
 | 
			
		||||
// Validates frame dimensions.
 | 
			
		||||
//
 | 
			
		||||
// Both frame width and frame height must be positive.
 | 
			
		||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
 | 
			
		||||
 | 
			
		||||
}  // namespace mediapipe::tasks::vision::face_geometry
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_TASKS_CC_VISION_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
 | 
			
		||||
							
								
								
									
										46
									
								
								mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								mediapipe/tasks/cc/vision/face_geometry/proto/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,46 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//visibility:public"])
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "environment_proto",
 | 
			
		||||
    srcs = ["environment.proto"],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "face_geometry_proto",
 | 
			
		||||
    srcs = ["face_geometry.proto"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":mesh_3d_proto",
 | 
			
		||||
        "//mediapipe/framework/formats:matrix_data_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "geometry_pipeline_metadata_proto",
 | 
			
		||||
    srcs = ["geometry_pipeline_metadata.proto"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":mesh_3d_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "mesh_3d_proto",
 | 
			
		||||
    srcs = ["mesh_3d.proto"],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,84 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks.vision.face_geometry.proto;
 | 
			
		||||
 | 
			
		||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
 | 
			
		||||
option java_outer_classname = "EnvironmentProto";
 | 
			
		||||
 | 
			
		||||
// Defines the (0, 0) origin point location of the environment.
 | 
			
		||||
//
 | 
			
		||||
// The variation in the origin point location can be traced back to the memory
 | 
			
		||||
// layout of the camera video frame buffers.
 | 
			
		||||
//
 | 
			
		||||
// Usually, the memory layout for most CPU (and also some GPU) camera video
 | 
			
		||||
// frame buffers results in having the (0, 0) origin point located in the
 | 
			
		||||
// Top Left corner.
 | 
			
		||||
//
 | 
			
		||||
// On the contrary, the memory layout for most GPU camera video frame buffers
 | 
			
		||||
// results in having the (0, 0) origin point located in the Bottom Left corner.
 | 
			
		||||
//
 | 
			
		||||
// Let's consider the following example:
 | 
			
		||||
//
 | 
			
		||||
// (A) ---------------+
 | 
			
		||||
//               ___  |
 | 
			
		||||
//  |     (1)    | |  |
 | 
			
		||||
//  |     / \    | |  |
 | 
			
		||||
//  |    |---|===|-|  |
 | 
			
		||||
//  |    |---|   | |  |
 | 
			
		||||
//  |   /     \  | |  |
 | 
			
		||||
//  |  |       | | |  |
 | 
			
		||||
//  |  |  (2)  |=| |  |
 | 
			
		||||
//  |  |       | | |  |
 | 
			
		||||
//  |  |_______| |_|  |
 | 
			
		||||
//  |   |@| |@|  | |  |
 | 
			
		||||
//  | ___________|_|_ |
 | 
			
		||||
//                    |
 | 
			
		||||
// (B) ---------------+
 | 
			
		||||
//
 | 
			
		||||
// On this example, (1) and (2) have the same X coordinate regardless of the
 | 
			
		||||
// origin point location. However, having the origin point located at (A)
 | 
			
		||||
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
 | 
			
		||||
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
 | 
			
		||||
// results in (1) having a greater Y coordinate if compared to (2).
 | 
			
		||||
//
 | 
			
		||||
// Providing the correct origin point location for your environment and making
 | 
			
		||||
// sure all the input landmarks are in-sync with this location is crucial
 | 
			
		||||
// for receiving the correct output face geometry and visual renders.
 | 
			
		||||
enum OriginPointLocation {
 | 
			
		||||
  BOTTOM_LEFT_CORNER = 1;
 | 
			
		||||
  TOP_LEFT_CORNER = 2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// The perspective camera is defined through its vertical FOV angle and the
 | 
			
		||||
// Z-clipping planes. The aspect ratio is a runtime variable for the face
 | 
			
		||||
// geometry module and should be provided alongside the face landmarks in order
 | 
			
		||||
// to estimate the face geometry on a given frame.
 | 
			
		||||
//
 | 
			
		||||
// More info on Perspective Cameras:
 | 
			
		||||
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
 | 
			
		||||
message PerspectiveCamera {
 | 
			
		||||
  // `0 < vertical_fov_degrees < 180`.
 | 
			
		||||
  optional float vertical_fov_degrees = 1;
 | 
			
		||||
  // `0 < near < far`.
 | 
			
		||||
  optional float near = 2;
 | 
			
		||||
  optional float far = 3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
message Environment {
 | 
			
		||||
  optional OriginPointLocation origin_point_location = 1;
 | 
			
		||||
  optional PerspectiveCamera perspective_camera = 2;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,60 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks.vision.face_geometry.proto;
 | 
			
		||||
 | 
			
		||||
import "mediapipe/framework/formats/matrix_data.proto";
 | 
			
		||||
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
 | 
			
		||||
 | 
			
		||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
 | 
			
		||||
option java_outer_classname = "FaceGeometryProto";
 | 
			
		||||
 | 
			
		||||
// Defines the face geometry pipeline estimation result format.
 | 
			
		||||
message FaceGeometry {
 | 
			
		||||
  // Defines a mesh surface for a face. The face mesh vertex IDs are the same as
 | 
			
		||||
  // the face landmark IDs.
 | 
			
		||||
  //
 | 
			
		||||
  // XYZ coordinates exist in the right-handed Metric 3D space configured by an
 | 
			
		||||
  // environment. UV coodinates are taken from the canonical face mesh model.
 | 
			
		||||
  //
 | 
			
		||||
  // XY coordinates are guaranteed to match the screen positions of
 | 
			
		||||
  // the input face landmarks after (1) being multiplied by the face pose
 | 
			
		||||
  // transformation matrix and then (2) being projected with a perspective
 | 
			
		||||
  // camera matrix of the same environment.
 | 
			
		||||
  //
 | 
			
		||||
  // NOTE: the triangular topology of the face mesh is only useful when derived
 | 
			
		||||
  //       from the 468 face landmarks, not from the 6 face detection landmarks
 | 
			
		||||
  //       (keypoints). The former don't cover the entire face and this mesh is
 | 
			
		||||
  //       defined here only to comply with the API. It should be considered as
 | 
			
		||||
  //       a placeholder and/or for debugging purposes.
 | 
			
		||||
  //
 | 
			
		||||
  //       Use the face geometry derived from the face detection landmarks
 | 
			
		||||
  //       (keypoints) for the face pose transformation matrix, not the mesh.
 | 
			
		||||
  optional Mesh3d mesh = 1;
 | 
			
		||||
 | 
			
		||||
  // Defines a face pose transformation matrix, which provides mapping from
 | 
			
		||||
  // the static canonical face model to the runtime face. Tries to distinguish
 | 
			
		||||
  // a head pose change from a facial expression change and to only reflect the
 | 
			
		||||
  // former.
 | 
			
		||||
  //
 | 
			
		||||
  // Is a 4x4 matrix and contains only the following components:
 | 
			
		||||
  //   * Uniform scale
 | 
			
		||||
  //   * Rotation
 | 
			
		||||
  //   * Translation
 | 
			
		||||
  //
 | 
			
		||||
  // The last row is guaranteed to be `[0 0 0 1]`.
 | 
			
		||||
  optional mediapipe.MatrixData pose_transform_matrix = 2;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,63 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks.vision.face_geometry.proto;
 | 
			
		||||
 | 
			
		||||
import "mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto";
 | 
			
		||||
 | 
			
		||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
 | 
			
		||||
option java_outer_classname = "GeometryPipelineMetadataProto";
 | 
			
		||||
 | 
			
		||||
enum InputSource {
 | 
			
		||||
  DEFAULT = 0;  // FACE_LANDMARK_PIPELINE
 | 
			
		||||
  FACE_LANDMARK_PIPELINE = 1;
 | 
			
		||||
  FACE_DETECTION_PIPELINE = 2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
message WeightedLandmarkRef {
 | 
			
		||||
  // Defines the landmark ID. References an existing face landmark ID.
 | 
			
		||||
  optional uint32 landmark_id = 1;
 | 
			
		||||
  // Defines the landmark weight. The larger the weight the more influence this
 | 
			
		||||
  // landmark has in the basis.
 | 
			
		||||
  //
 | 
			
		||||
  // Is positive.
 | 
			
		||||
  optional float weight = 2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Next field ID: 4
 | 
			
		||||
message GeometryPipelineMetadata {
 | 
			
		||||
  // Defines the source of the input landmarks to let the underlying geometry
 | 
			
		||||
  // pipeline to adjust in order to produce the best results.
 | 
			
		||||
  //
 | 
			
		||||
  // Face landmark pipeline is expected to produce 3D landmarks with relative Z
 | 
			
		||||
  // coordinate, which is scaled as the X coordinate assuming the weak
 | 
			
		||||
  // perspective projection camera model.
 | 
			
		||||
  //
 | 
			
		||||
  // Face landmark pipeline is expected to produce 2D landmarks with Z
 | 
			
		||||
  // coordinate being equal to 0.
 | 
			
		||||
  optional InputSource input_source = 3;
 | 
			
		||||
  // Defines a mesh surface for a canonical face. The canonical face mesh vertex
 | 
			
		||||
  // IDs are the same as the face landmark IDs.
 | 
			
		||||
  //
 | 
			
		||||
  // XYZ coordinates are defined in centimeter units.
 | 
			
		||||
  optional Mesh3d canonical_mesh = 1;
 | 
			
		||||
  // Defines a weighted landmark basis for running the Procrustes solver
 | 
			
		||||
  // algorithm inside the geometry pipeline.
 | 
			
		||||
  //
 | 
			
		||||
  // A good basis sets face landmark weights in way to distinguish a head pose
 | 
			
		||||
  // change from a facial expression change and to only respond to the former.
 | 
			
		||||
  repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										41
									
								
								mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d.proto
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,41 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks.vision.face_geometry.proto;
 | 
			
		||||
 | 
			
		||||
option java_package = "mediapipe.tasks.vision.facegeometry.proto";
 | 
			
		||||
option java_outer_classname = "Mesh3dProto";
 | 
			
		||||
 | 
			
		||||
message Mesh3d {
 | 
			
		||||
  enum VertexType {
 | 
			
		||||
    // Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
 | 
			
		||||
    VERTEX_PT = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  enum PrimitiveType {
 | 
			
		||||
    // Is defined by 3 indices: triangle vertex IDs.
 | 
			
		||||
    TRIANGLE = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  optional VertexType vertex_type = 1;
 | 
			
		||||
  optional PrimitiveType primitive_type = 2;
 | 
			
		||||
  // Vertex buffer size is a multiple of the vertex size (e.g., 5 for
 | 
			
		||||
  // VERTEX_PT).
 | 
			
		||||
  repeated float vertex_buffer = 3;
 | 
			
		||||
  // Index buffer size is a multiple of the primitive size (e.g., 3 for
 | 
			
		||||
  // TRIANGLE).
 | 
			
		||||
  repeated uint32 index_buffer = 4;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										108
									
								
								mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,108 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//mediapipe/tasks:internal"])
 | 
			
		||||
 | 
			
		||||
mediapipe_proto_library(
 | 
			
		||||
    name = "tensors_to_image_calculator_proto",
 | 
			
		||||
    srcs = ["tensors_to_image_calculator.proto"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/framework:calculator_options_proto",
 | 
			
		||||
        "//mediapipe/framework:calculator_proto",
 | 
			
		||||
        "//mediapipe/gpu:gpu_origin_proto",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "tensors_to_image_calculator",
 | 
			
		||||
    srcs = ["tensors_to_image_calculator.cc"],
 | 
			
		||||
    copts = select({
 | 
			
		||||
        "//mediapipe:apple": [
 | 
			
		||||
            "-x objective-c++",
 | 
			
		||||
            "-fobjc-arc",  # enable reference-counting
 | 
			
		||||
        ],
 | 
			
		||||
        "//conditions:default": [],
 | 
			
		||||
    }),
 | 
			
		||||
    features = ["-layering_check"],  # allow depending on tensor_to_image_calculator_gpu_deps
 | 
			
		||||
    linkopts = select({
 | 
			
		||||
        "//mediapipe:apple": [
 | 
			
		||||
            "-framework CoreVideo",
 | 
			
		||||
            "-framework MetalKit",
 | 
			
		||||
        ],
 | 
			
		||||
        "//conditions:default": [],
 | 
			
		||||
    }),
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":tensors_to_image_calculator_cc_proto",
 | 
			
		||||
        "@com_google_absl//absl/status",
 | 
			
		||||
        "@com_google_absl//absl/strings",
 | 
			
		||||
        "//mediapipe/framework:calculator_framework",
 | 
			
		||||
        "//mediapipe/framework:calculator_options_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/api2:builder",
 | 
			
		||||
        "//mediapipe/framework/api2:node",
 | 
			
		||||
        "//mediapipe/framework/api2:packet",
 | 
			
		||||
        "//mediapipe/framework/api2:port",
 | 
			
		||||
        "//mediapipe/framework/formats:image",
 | 
			
		||||
        "//mediapipe/framework/formats:tensor",
 | 
			
		||||
        "//mediapipe/framework/port:logging",
 | 
			
		||||
        "//mediapipe/framework/port:ret_check",
 | 
			
		||||
        "//mediapipe/framework/port:status",
 | 
			
		||||
        "//mediapipe/framework/port:vector",
 | 
			
		||||
        "//mediapipe/gpu:gpu_origin_cc_proto",
 | 
			
		||||
    ] + select({
 | 
			
		||||
        "//mediapipe/gpu:disable_gpu": [],
 | 
			
		||||
        "//conditions:default": ["tensor_to_image_calculator_gpu_deps"],
 | 
			
		||||
    }),
 | 
			
		||||
    alwayslink = 1,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
cc_library(
 | 
			
		||||
    name = "tensor_to_image_calculator_gpu_deps",
 | 
			
		||||
    visibility = ["//visibility:private"],
 | 
			
		||||
    deps = select({
 | 
			
		||||
        "//mediapipe:android": [
 | 
			
		||||
            "//mediapipe/gpu:gl_calculator_helper",
 | 
			
		||||
            "//mediapipe/gpu:gl_quad_renderer",
 | 
			
		||||
            "//mediapipe/gpu:gl_simple_shaders",
 | 
			
		||||
            "//mediapipe/gpu:gpu_buffer",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
 | 
			
		||||
        ],
 | 
			
		||||
        "//mediapipe:ios": [
 | 
			
		||||
            "//mediapipe/gpu:MPPMetalHelper",
 | 
			
		||||
            "//mediapipe/gpu:MPPMetalUtil",
 | 
			
		||||
            "//mediapipe/gpu:gl_calculator_helper",
 | 
			
		||||
            "//mediapipe/gpu:gpu_buffer",
 | 
			
		||||
        ],
 | 
			
		||||
        "//mediapipe:macos": [],
 | 
			
		||||
        "//conditions:default": [
 | 
			
		||||
            "//mediapipe/gpu:gl_calculator_helper",
 | 
			
		||||
            "//mediapipe/gpu:gl_quad_renderer",
 | 
			
		||||
            "//mediapipe/gpu:gpu_buffer",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/common:util",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
 | 
			
		||||
            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
 | 
			
		||||
        ],
 | 
			
		||||
    }),
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,439 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "absl/status/status.h"
 | 
			
		||||
#include "absl/strings/str_cat.h"
 | 
			
		||||
#include "mediapipe/framework/api2/node.h"
 | 
			
		||||
#include "mediapipe/framework/api2/packet.h"
 | 
			
		||||
#include "mediapipe/framework/api2/port.h"
 | 
			
		||||
#include "mediapipe/framework/calculator_framework.h"
 | 
			
		||||
#include "mediapipe/framework/calculator_options.pb.h"
 | 
			
		||||
#include "mediapipe/framework/formats/image.h"
 | 
			
		||||
#include "mediapipe/framework/formats/tensor.h"
 | 
			
		||||
#include "mediapipe/framework/port/logging.h"
 | 
			
		||||
#include "mediapipe/framework/port/status.h"
 | 
			
		||||
#include "mediapipe/gpu/gpu_origin.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h"
 | 
			
		||||
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
#include "mediapipe/gpu/gpu_buffer.h"
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#import <CoreVideo/CoreVideo.h>
 | 
			
		||||
#import <Metal/Metal.h>
 | 
			
		||||
#import <MetalKit/MetalKit.h>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h"
 | 
			
		||||
#import "mediapipe/gpu/MPPMetalHelper.h"
 | 
			
		||||
#else
 | 
			
		||||
#include "mediapipe/gpu/gl_calculator_helper.h"
 | 
			
		||||
#include "mediapipe/gpu/gl_quad_renderer.h"
 | 
			
		||||
#include "mediapipe/gpu/gl_simple_shaders.h"
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/gl/gl_shader.h"
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
 | 
			
		||||
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
namespace tasks {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
using ::mediapipe::api2::Input;
 | 
			
		||||
using ::mediapipe::api2::Node;
 | 
			
		||||
using ::mediapipe::api2::Output;
 | 
			
		||||
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
using ::tflite::gpu::gl::GlProgram;
 | 
			
		||||
using ::tflite::gpu::gl::GlShader;
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
 | 
			
		||||
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
 | 
			
		||||
 | 
			
		||||
// Commonly used to compute the number of blocks to launch in a kernel.
 | 
			
		||||
static int NumGroups(const int size, const int group_size) {  // NOLINT
 | 
			
		||||
  return (size + group_size - 1) / group_size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
// Converts a MediaPipe tensor to a MediaPipe Image.
 | 
			
		||||
//
 | 
			
		||||
// Input streams:
 | 
			
		||||
//   TENSORS - std::vector<mediapipe::Tensor> that only contains one element.
 | 
			
		||||
//
 | 
			
		||||
// Output streams:
 | 
			
		||||
//   OUTPUT - mediapipe::Image.
 | 
			
		||||
//
 | 
			
		||||
// TODO: Enable TensorsToImageCalculator to run on CPU.
 | 
			
		||||
class TensorsToImageCalculator : public Node {
 | 
			
		||||
 public:
 | 
			
		||||
  static constexpr Input<std::vector<Tensor>> kInputTensors{"TENSORS"};
 | 
			
		||||
  static constexpr Output<Image> kOutputImage{"IMAGE"};
 | 
			
		||||
 | 
			
		||||
  MEDIAPIPE_NODE_CONTRACT(kInputTensors, kOutputImage);
 | 
			
		||||
 | 
			
		||||
  static absl::Status UpdateContract(CalculatorContract* cc);
 | 
			
		||||
  absl::Status Open(CalculatorContext* cc);
 | 
			
		||||
  absl::Status Process(CalculatorContext* cc);
 | 
			
		||||
  absl::Status Close(CalculatorContext* cc);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
  bool metal_initialized_ = false;
 | 
			
		||||
  MPPMetalHelper* gpu_helper_ = nullptr;
 | 
			
		||||
  id<MTLComputePipelineState> to_buffer_program_;
 | 
			
		||||
 | 
			
		||||
  absl::Status MetalSetup(CalculatorContext* cc);
 | 
			
		||||
  absl::Status MetalProcess(CalculatorContext* cc);
 | 
			
		||||
#else
 | 
			
		||||
  absl::Status GlSetup(CalculatorContext* cc);
 | 
			
		||||
 | 
			
		||||
  GlCalculatorHelper gl_helper_;
 | 
			
		||||
 | 
			
		||||
  bool gl_initialized_ = false;
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
  std::unique_ptr<tflite::gpu::gl::GlProgram> gl_compute_program_;
 | 
			
		||||
  const tflite::gpu::uint3 workgroup_size_ = {8, 8, 1};
 | 
			
		||||
#else
 | 
			
		||||
  GLuint program_ = 0;
 | 
			
		||||
  std::unique_ptr<mediapipe::QuadRenderer> gl_renderer_;
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
};
 | 
			
		||||
MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToImageCalculator);
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::UpdateContract(CalculatorContract* cc) {
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
  MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
 | 
			
		||||
#else
 | 
			
		||||
  return GlCalculatorHelper::UpdateContract(cc);
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) {
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
  gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
 | 
			
		||||
  RET_CHECK(gpu_helper_);
 | 
			
		||||
#else
 | 
			
		||||
  MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) {
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
 | 
			
		||||
  return MetalProcess(cc);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
  return gl_helper_.RunInGlContext([this, cc]() -> absl::Status {
 | 
			
		||||
    if (!gl_initialized_) {
 | 
			
		||||
      MP_RETURN_IF_ERROR(GlSetup(cc));
 | 
			
		||||
      gl_initialized_ = true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (kInputTensors(cc).IsEmpty()) {
 | 
			
		||||
      return absl::OkStatus();
 | 
			
		||||
    }
 | 
			
		||||
    const auto& input_tensors = kInputTensors(cc).Get();
 | 
			
		||||
    RET_CHECK_EQ(input_tensors.size(), 1)
 | 
			
		||||
        << "Expect 1 input tensor, but have " << input_tensors.size();
 | 
			
		||||
    const int tensor_width = input_tensors[0].shape().dims[2];
 | 
			
		||||
    const int tensor_height = input_tensors[0].shape().dims[1];
 | 
			
		||||
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
 | 
			
		||||
    auto out_texture = std::make_unique<tflite::gpu::gl::GlTexture>();
 | 
			
		||||
    MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
 | 
			
		||||
        tflite::gpu::DataType::UINT8,  // GL_RGBA8
 | 
			
		||||
        {tensor_width, tensor_height}, out_texture.get()));
 | 
			
		||||
 | 
			
		||||
    const int output_index = 0;
 | 
			
		||||
    glBindImageTexture(output_index, out_texture->id(), 0, GL_FALSE, 0,
 | 
			
		||||
                       GL_WRITE_ONLY, GL_RGBA8);
 | 
			
		||||
 | 
			
		||||
    auto read_view = input_tensors[0].GetOpenGlBufferReadView();
 | 
			
		||||
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name());
 | 
			
		||||
 | 
			
		||||
    const tflite::gpu::uint3 workload = {tensor_width, tensor_height, 1};
 | 
			
		||||
    const tflite::gpu::uint3 workgroups =
 | 
			
		||||
        tflite::gpu::DivideRoundUp(workload, workgroup_size_);
 | 
			
		||||
 | 
			
		||||
    glUseProgram(gl_compute_program_->id());
 | 
			
		||||
    glUniform2i(glGetUniformLocation(gl_compute_program_->id(), "out_size"),
 | 
			
		||||
                tensor_width, tensor_height);
 | 
			
		||||
 | 
			
		||||
    MP_RETURN_IF_ERROR(gl_compute_program_->Dispatch(workgroups));
 | 
			
		||||
 | 
			
		||||
    auto texture_buffer = mediapipe::GlTextureBuffer::Wrap(
 | 
			
		||||
        out_texture->target(), out_texture->id(), tensor_width, tensor_height,
 | 
			
		||||
        mediapipe::GpuBufferFormat::kBGRA32,
 | 
			
		||||
        [ptr = out_texture.release()](
 | 
			
		||||
            std::shared_ptr<mediapipe::GlSyncPoint> sync_token) mutable {
 | 
			
		||||
          delete ptr;
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
    auto output =
 | 
			
		||||
        std::make_unique<mediapipe::GpuBuffer>(std::move(texture_buffer));
 | 
			
		||||
    kOutputImage(cc).Send(Image(*output));
 | 
			
		||||
    ;
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
    if (!input_tensors[0].ready_as_opengl_texture_2d()) {
 | 
			
		||||
      (void)input_tensors[0].GetCpuReadView();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    auto output_texture =
 | 
			
		||||
        gl_helper_.CreateDestinationTexture(tensor_width, tensor_height);
 | 
			
		||||
    gl_helper_.BindFramebuffer(output_texture);  // GL_TEXTURE0
 | 
			
		||||
    glActiveTexture(GL_TEXTURE1);
 | 
			
		||||
    glBindTexture(GL_TEXTURE_2D,
 | 
			
		||||
                  input_tensors[0].GetOpenGlTexture2dReadView().name());
 | 
			
		||||
 | 
			
		||||
    MP_RETURN_IF_ERROR(gl_renderer_->GlRender(
 | 
			
		||||
        tensor_width, tensor_height, output_texture.width(),
 | 
			
		||||
        output_texture.height(), mediapipe::FrameScaleMode::kStretch,
 | 
			
		||||
        mediapipe::FrameRotation::kNone,
 | 
			
		||||
        /*flip_horizontal=*/false, /*flip_vertical=*/false,
 | 
			
		||||
        /*flip_texture=*/false));
 | 
			
		||||
 | 
			
		||||
    glActiveTexture(GL_TEXTURE1);
 | 
			
		||||
    glBindTexture(GL_TEXTURE_2D, 0);
 | 
			
		||||
 | 
			
		||||
    auto output = output_texture.GetFrame<GpuBuffer>();
 | 
			
		||||
    kOutputImage(cc).Send(Image(*output));
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
 | 
			
		||||
    return mediapipe::OkStatus();
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) {
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
  gl_helper_.RunInGlContext([this] {
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
    gl_compute_program_.reset();
 | 
			
		||||
#else
 | 
			
		||||
    if (program_) glDeleteProgram(program_);
 | 
			
		||||
    program_ = 0;
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
  });
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#if MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) {
 | 
			
		||||
  if (!metal_initialized_) {
 | 
			
		||||
    MP_RETURN_IF_ERROR(MetalSetup(cc));
 | 
			
		||||
    metal_initialized_ = true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (kInputTensors(cc).IsEmpty()) {
 | 
			
		||||
    return absl::OkStatus();
 | 
			
		||||
  }
 | 
			
		||||
  const auto& input_tensors = kInputTensors(cc).Get();
 | 
			
		||||
  RET_CHECK_EQ(input_tensors.size(), 1)
 | 
			
		||||
      << "Expect 1 input tensor, but have " << input_tensors.size();
 | 
			
		||||
  const int tensor_width = input_tensors[0].shape().dims[2];
 | 
			
		||||
  const int tensor_height = input_tensors[0].shape().dims[1];
 | 
			
		||||
 | 
			
		||||
  // TODO: Fix unused variable
 | 
			
		||||
  [[maybe_unused]] id<MTLDevice> device = gpu_helper_.mtlDevice;
 | 
			
		||||
  id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
 | 
			
		||||
  command_buffer.label = @"TensorsToImageCalculatorConvert";
 | 
			
		||||
  id<MTLComputeCommandEncoder> compute_encoder =
 | 
			
		||||
      [command_buffer computeCommandEncoder];
 | 
			
		||||
  [compute_encoder setComputePipelineState:to_buffer_program_];
 | 
			
		||||
 | 
			
		||||
  auto input_view =
 | 
			
		||||
      mediapipe::MtlBufferView::GetReadView(input_tensors[0], command_buffer);
 | 
			
		||||
  [compute_encoder setBuffer:input_view.buffer() offset:0 atIndex:0];
 | 
			
		||||
 | 
			
		||||
  mediapipe::GpuBuffer output =
 | 
			
		||||
      [gpu_helper_ mediapipeGpuBufferWithWidth:tensor_width
 | 
			
		||||
                                        height:tensor_height];
 | 
			
		||||
  id<MTLTexture> dst_texture = [gpu_helper_ metalTextureWithGpuBuffer:output];
 | 
			
		||||
  [compute_encoder setTexture:dst_texture atIndex:1];
 | 
			
		||||
 | 
			
		||||
  MTLSize threads_per_group = MTLSizeMake(8, 8, 1);
 | 
			
		||||
  MTLSize threadgroups =
 | 
			
		||||
      MTLSizeMake(NumGroups(tensor_width, 8), NumGroups(tensor_height, 8), 1);
 | 
			
		||||
  [compute_encoder dispatchThreadgroups:threadgroups
 | 
			
		||||
                  threadsPerThreadgroup:threads_per_group];
 | 
			
		||||
  [compute_encoder endEncoding];
 | 
			
		||||
  [command_buffer commit];
 | 
			
		||||
 | 
			
		||||
  kOutputImage(cc).Send(Image(output));
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) {
 | 
			
		||||
  id<MTLDevice> device = gpu_helper_.mtlDevice;
 | 
			
		||||
  const std::string shader_source =
 | 
			
		||||
      R"(
 | 
			
		||||
  #include <metal_stdlib>
 | 
			
		||||
 | 
			
		||||
  using namespace metal;
 | 
			
		||||
 | 
			
		||||
  kernel void convertKernel(
 | 
			
		||||
      device float*                         in_buf   [[ buffer(0) ]],
 | 
			
		||||
      texture2d<float, access::read_write>  out_tex  [[ texture(1) ]],
 | 
			
		||||
      uint2                                 gid      [[ thread_position_in_grid ]]) {
 | 
			
		||||
        if (gid.x >= out_tex.get_width() || gid.y >= out_tex.get_height()) return;
 | 
			
		||||
        uint linear_index = 3 * (gid.y * out_tex.get_width() + gid.x);
 | 
			
		||||
        float4 out_value = float4(in_buf[linear_index], in_buf[linear_index + 1], in_buf[linear_index + 2], 1.0);
 | 
			
		||||
        out_tex.write(out_value, gid);
 | 
			
		||||
      }
 | 
			
		||||
  )";
 | 
			
		||||
  NSString* library_source =
 | 
			
		||||
      [NSString stringWithUTF8String:shader_source.c_str()];
 | 
			
		||||
  NSError* error = nil;
 | 
			
		||||
  id<MTLLibrary> library =
 | 
			
		||||
      [device newLibraryWithSource:library_source options:nullptr error:&error];
 | 
			
		||||
  RET_CHECK(library != nil) << "Couldn't create shader library "
 | 
			
		||||
                            << [[error localizedDescription] UTF8String];
 | 
			
		||||
  id<MTLFunction> kernel_func = nil;
 | 
			
		||||
  kernel_func = [library newFunctionWithName:@"convertKernel"];
 | 
			
		||||
  RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
 | 
			
		||||
  to_buffer_program_ =
 | 
			
		||||
      [device newComputePipelineStateWithFunction:kernel_func error:&error];
 | 
			
		||||
  RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
 | 
			
		||||
      [[error localizedDescription] UTF8String];
 | 
			
		||||
 | 
			
		||||
  return mediapipe::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
 | 
			
		||||
#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) {
 | 
			
		||||
  std::string maybe_flip_y_define;
 | 
			
		||||
#if !defined(__APPLE__)
 | 
			
		||||
  const auto& options = cc->Options<TensorsToImageCalculatorOptions>();
 | 
			
		||||
  if (options.gpu_origin() != mediapipe::GpuOrigin::TOP_LEFT) {
 | 
			
		||||
    maybe_flip_y_define = R"(
 | 
			
		||||
      #define FLIP_Y_COORD
 | 
			
		||||
    )";
 | 
			
		||||
  }
 | 
			
		||||
#endif  // !defined(__APPLE__)
 | 
			
		||||
 | 
			
		||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
 | 
			
		||||
  const std::string shader_header =
 | 
			
		||||
      absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size_), R"(
 | 
			
		||||
    precision highp float;
 | 
			
		||||
    layout(rgba8, binding = 0) writeonly uniform highp image2D output_texture;
 | 
			
		||||
    uniform ivec2 out_size;
 | 
			
		||||
  )");
 | 
			
		||||
 | 
			
		||||
  const std::string shader_body = R"(
 | 
			
		||||
    layout(std430, binding = 2) readonly buffer B0 {
 | 
			
		||||
      float elements[];
 | 
			
		||||
    } input_data;   // data tensor
 | 
			
		||||
 | 
			
		||||
    void main() {
 | 
			
		||||
      int out_width = out_size.x;
 | 
			
		||||
      int out_height = out_size.y;
 | 
			
		||||
 | 
			
		||||
      ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
 | 
			
		||||
      if (gid.x >= out_width || gid.y >= out_height) { return; }
 | 
			
		||||
      int linear_index = 3 * (gid.y * out_width + gid.x);
 | 
			
		||||
 | 
			
		||||
#ifdef FLIP_Y_COORD
 | 
			
		||||
      int y_coord = out_height - gid.y - 1;
 | 
			
		||||
#else
 | 
			
		||||
      int y_coord = gid.y;
 | 
			
		||||
#endif  // defined(FLIP_Y_COORD)
 | 
			
		||||
 | 
			
		||||
      ivec2 out_coordinate = ivec2(gid.x, y_coord);
 | 
			
		||||
      vec4 out_value = vec4(input_data.elements[linear_index], input_data.elements[linear_index + 1], input_data.elements[linear_index + 2], 1.0);
 | 
			
		||||
      imageStore(output_texture, out_coordinate, out_value);
 | 
			
		||||
    })";
 | 
			
		||||
 | 
			
		||||
  const std::string shader_full =
 | 
			
		||||
      absl::StrCat(shader_header, maybe_flip_y_define, shader_body);
 | 
			
		||||
 | 
			
		||||
  GlShader shader;
 | 
			
		||||
  MP_RETURN_IF_ERROR(
 | 
			
		||||
      GlShader::CompileShader(GL_COMPUTE_SHADER, shader_full, &shader));
 | 
			
		||||
  gl_compute_program_ = std::make_unique<GlProgram>();
 | 
			
		||||
  MP_RETURN_IF_ERROR(
 | 
			
		||||
      GlProgram::CreateWithShader(shader, gl_compute_program_.get()));
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
  constexpr GLchar kFragColorOutputDeclaration[] = R"(
 | 
			
		||||
  #ifdef GL_ES
 | 
			
		||||
    #define fragColor gl_FragColor
 | 
			
		||||
  #else
 | 
			
		||||
    out vec4 fragColor;
 | 
			
		||||
  #endif  // defined(GL_ES);
 | 
			
		||||
)";
 | 
			
		||||
 | 
			
		||||
  constexpr GLchar kBody[] = R"(
 | 
			
		||||
    DEFAULT_PRECISION(mediump, float)
 | 
			
		||||
    in vec2 sample_coordinate;
 | 
			
		||||
    uniform sampler2D tensor;
 | 
			
		||||
    void main() {
 | 
			
		||||
#ifdef FLIP_Y_COORD
 | 
			
		||||
      float y_coord = 1.0 - sample_coordinate.y;
 | 
			
		||||
#else
 | 
			
		||||
      float y_coord = sample_coordinate.y;
 | 
			
		||||
#endif  // defined(FLIP_Y_COORD)
 | 
			
		||||
      vec3 color = texture2D(tensor, vec2(sample_coordinate.x, y_coord)).rgb;
 | 
			
		||||
      fragColor = vec4(color, 1.0);
 | 
			
		||||
    }
 | 
			
		||||
  )";
 | 
			
		||||
 | 
			
		||||
  const std::string src =
 | 
			
		||||
      absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
 | 
			
		||||
                   kFragColorOutputDeclaration, maybe_flip_y_define, kBody);
 | 
			
		||||
  gl_renderer_ = std::make_unique<mediapipe::QuadRenderer>();
 | 
			
		||||
  MP_RETURN_IF_ERROR(gl_renderer_->GlSetup(src.c_str(), {"tensor"}));
 | 
			
		||||
 | 
			
		||||
#endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
 | 
			
		||||
 | 
			
		||||
  return mediapipe::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED
 | 
			
		||||
 | 
			
		||||
}  // namespace tasks
 | 
			
		||||
}  // namespace mediapipe
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,31 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
syntax = "proto2";
 | 
			
		||||
 | 
			
		||||
package mediapipe.tasks;
 | 
			
		||||
 | 
			
		||||
import "mediapipe/framework/calculator.proto";
 | 
			
		||||
import "mediapipe/gpu/gpu_origin.proto";
 | 
			
		||||
 | 
			
		||||
message TensorsToImageCalculatorOptions {
 | 
			
		||||
  extend mediapipe.CalculatorOptions {
 | 
			
		||||
    optional TensorsToImageCalculatorOptions ext = 511831156;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
 | 
			
		||||
  // to be flipped vertically as tensors are expected to start at top.
 | 
			
		||||
  // (DEFAULT or unset interpreted as CONVENTIONAL.)
 | 
			
		||||
  optional mediapipe.GpuOrigin.Mode gpu_origin = 1;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -203,106 +203,111 @@ INSTANTIATE_TEST_CASE_P(
 | 
			
		|||
    CombinedPredictionCalculatorTests, CombinedPredictionCalculatorTest,
 | 
			
		||||
    testing::ValuesIn<CombinedPredictionCalculatorTestCase>({
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestCustomDramaWinnnerWith_HighCanned_Thresh",
 | 
			
		||||
            .custom_negative_score = 0.1,
 | 
			
		||||
            .drama_score = 0.5,
 | 
			
		||||
            .llama_score = 0.3,
 | 
			
		||||
            .drama_thresh = 0.25,
 | 
			
		||||
            .llama_thresh = 0.7,
 | 
			
		||||
            .canned_negative_score = 0.1,
 | 
			
		||||
            .bazinga_score = 0.3,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.3,
 | 
			
		||||
            .bazinga_thresh = 0.7,
 | 
			
		||||
            .joy_thresh = 0.7,
 | 
			
		||||
            .peace_thresh = 0.7,
 | 
			
		||||
            .max_scoring_label = "CustomDrama",
 | 
			
		||||
            .max_score = 0.5,
 | 
			
		||||
            /* test_name= */ "TestCustomDramaWinnnerWith_HighCanned_Thresh",
 | 
			
		||||
            /* custom_negative_score= */ 0.1,
 | 
			
		||||
            /* drama_score= */ 0.5,
 | 
			
		||||
            /* llama_score= */ 0.3,
 | 
			
		||||
            /* drama_thresh= */ 0.25,
 | 
			
		||||
            /* llama_thresh= */ 0.7,
 | 
			
		||||
            /* canned_negative_score= */ 0.1,
 | 
			
		||||
            /* bazinga_score= */ 0.3,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.3,
 | 
			
		||||
            /* bazinga_thresh= */ 0.7,
 | 
			
		||||
            /* joy_thresh= */ 0.7,
 | 
			
		||||
            /* peace_thresh= */ 0.7,
 | 
			
		||||
            /* max_scoring_label= */ "CustomDrama",
 | 
			
		||||
            /* max_score= */ 0.5,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestCannedWinnerWith_HighCustom_ZeroCanned_Thresh",
 | 
			
		||||
            .custom_negative_score = 0.1,
 | 
			
		||||
            .drama_score = 0.3,
 | 
			
		||||
            .llama_score = 0.6,
 | 
			
		||||
            .drama_thresh = 0.4,
 | 
			
		||||
            .llama_thresh = 0.8,
 | 
			
		||||
            .canned_negative_score = 0.1,
 | 
			
		||||
            .bazinga_score = 0.4,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.2,
 | 
			
		||||
            .bazinga_thresh = 0.0,
 | 
			
		||||
            .joy_thresh = 0.0,
 | 
			
		||||
            .peace_thresh = 0.0,
 | 
			
		||||
            .max_scoring_label = "CannedBazinga",
 | 
			
		||||
            .max_score = 0.4,
 | 
			
		||||
            /* test_name= */ "TestCannedWinnerWith_HighCustom_ZeroCanned_"
 | 
			
		||||
                             "Thresh",
 | 
			
		||||
            /* custom_negative_score= */ 0.1,
 | 
			
		||||
            /* drama_score= */ 0.3,
 | 
			
		||||
            /* llama_score= */ 0.6,
 | 
			
		||||
            /* drama_thresh= */ 0.4,
 | 
			
		||||
            /* llama_thresh= */ 0.8,
 | 
			
		||||
            /* canned_negative_score= */ 0.1,
 | 
			
		||||
            /* bazinga_score= */ 0.4,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.2,
 | 
			
		||||
            /* bazinga_thresh= */ 0.0,
 | 
			
		||||
            /* joy_thresh= */ 0.0,
 | 
			
		||||
            /* peace_thresh= */ 0.0,
 | 
			
		||||
            /* max_scoring_label= */ "CannedBazinga",
 | 
			
		||||
            /* max_score= */ 0.4,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestNegativeWinnerWith_LowCustom_HighCanned_Thresh",
 | 
			
		||||
            .custom_negative_score = 0.5,
 | 
			
		||||
            .drama_score = 0.1,
 | 
			
		||||
            .llama_score = 0.4,
 | 
			
		||||
            .drama_thresh = 0.1,
 | 
			
		||||
            .llama_thresh = 0.05,
 | 
			
		||||
            .canned_negative_score = 0.1,
 | 
			
		||||
            .bazinga_score = 0.3,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.3,
 | 
			
		||||
            .bazinga_thresh = 0.7,
 | 
			
		||||
            .joy_thresh = 0.7,
 | 
			
		||||
            .peace_thresh = 0.7,
 | 
			
		||||
            .max_scoring_label = "Negative",
 | 
			
		||||
            .max_score = 0.5,
 | 
			
		||||
            /* test_name= */ "TestNegativeWinnerWith_LowCustom_HighCanned_"
 | 
			
		||||
                             "Thresh",
 | 
			
		||||
            /* custom_negative_score= */ 0.5,
 | 
			
		||||
            /* drama_score= */ 0.1,
 | 
			
		||||
            /* llama_score= */ 0.4,
 | 
			
		||||
            /* drama_thresh= */ 0.1,
 | 
			
		||||
            /* llama_thresh= */ 0.05,
 | 
			
		||||
            /* canned_negative_score= */ 0.1,
 | 
			
		||||
            /* bazinga_score= */ 0.3,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.3,
 | 
			
		||||
            /* bazinga_thresh= */ 0.7,
 | 
			
		||||
            /* joy_thresh= */ 0.7,
 | 
			
		||||
            /* peace_thresh= */ 0.7,
 | 
			
		||||
            /* max_scoring_label= */ "Negative",
 | 
			
		||||
            /* max_score= */ 0.5,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh",
 | 
			
		||||
            .custom_negative_score = 0.8,
 | 
			
		||||
            .drama_score = 0.1,
 | 
			
		||||
            .llama_score = 0.1,
 | 
			
		||||
            .drama_thresh = 0.25,
 | 
			
		||||
            .llama_thresh = 0.7,
 | 
			
		||||
            .canned_negative_score = 0.1,
 | 
			
		||||
            .bazinga_score = 0.3,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.3,
 | 
			
		||||
            .bazinga_thresh = 0.7,
 | 
			
		||||
            .joy_thresh = 0.7,
 | 
			
		||||
            .peace_thresh = 0.7,
 | 
			
		||||
            .max_scoring_label = "Negative",
 | 
			
		||||
            .max_score = 0.8,
 | 
			
		||||
            /* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
 | 
			
		||||
                             "Thresh",
 | 
			
		||||
            /* custom_negative_score= */ 0.8,
 | 
			
		||||
            /* drama_score= */ 0.1,
 | 
			
		||||
            /* llama_score= */ 0.1,
 | 
			
		||||
            /* drama_thresh= */ 0.25,
 | 
			
		||||
            /* llama_thresh= */ 0.7,
 | 
			
		||||
            /* canned_negative_score= */ 0.1,
 | 
			
		||||
            /* bazinga_score= */ 0.3,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.3,
 | 
			
		||||
            /* bazinga_thresh= */ 0.7,
 | 
			
		||||
            /* joy_thresh= */ 0.7,
 | 
			
		||||
            /* peace_thresh= */ 0.7,
 | 
			
		||||
            /* max_scoring_label= */ "Negative",
 | 
			
		||||
            /* max_score= */ 0.8,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestNegativeWinnerWith_HighCustom_HighCannedThresh2",
 | 
			
		||||
            .custom_negative_score = 0.1,
 | 
			
		||||
            .drama_score = 0.2,
 | 
			
		||||
            .llama_score = 0.7,
 | 
			
		||||
            .drama_thresh = 1.1,
 | 
			
		||||
            .llama_thresh = 1.1,
 | 
			
		||||
            .canned_negative_score = 0.1,
 | 
			
		||||
            .bazinga_score = 0.3,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.3,
 | 
			
		||||
            .bazinga_thresh = 0.7,
 | 
			
		||||
            .joy_thresh = 0.7,
 | 
			
		||||
            .peace_thresh = 0.7,
 | 
			
		||||
            .max_scoring_label = "Negative",
 | 
			
		||||
            .max_score = 0.1,
 | 
			
		||||
            /* test_name= */ "TestNegativeWinnerWith_HighCustom_"
 | 
			
		||||
                             "HighCannedThresh2",
 | 
			
		||||
            /* custom_negative_score= */ 0.1,
 | 
			
		||||
            /* drama_score= */ 0.2,
 | 
			
		||||
            /* llama_score= */ 0.7,
 | 
			
		||||
            /* drama_thresh= */ 1.1,
 | 
			
		||||
            /* llama_thresh= */ 1.1,
 | 
			
		||||
            /* canned_negative_score= */ 0.1,
 | 
			
		||||
            /* bazinga_score= */ 0.3,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.3,
 | 
			
		||||
            /* bazinga_thresh= */ 0.7,
 | 
			
		||||
            /* joy_thresh= */ 0.7,
 | 
			
		||||
            /* peace_thresh= */ 0.7,
 | 
			
		||||
            /* max_scoring_label= */ "Negative",
 | 
			
		||||
            /* max_score= */ 0.1,
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            .test_name = "TestNegativeWinnerWith_HighCustom_HighCanned_Thresh3",
 | 
			
		||||
            .custom_negative_score = 0.1,
 | 
			
		||||
            .drama_score = 0.3,
 | 
			
		||||
            .llama_score = 0.6,
 | 
			
		||||
            .drama_thresh = 0.4,
 | 
			
		||||
            .llama_thresh = 0.8,
 | 
			
		||||
            .canned_negative_score = 0.3,
 | 
			
		||||
            .bazinga_score = 0.2,
 | 
			
		||||
            .joy_score = 0.3,
 | 
			
		||||
            .peace_score = 0.2,
 | 
			
		||||
            .bazinga_thresh = 0.5,
 | 
			
		||||
            .joy_thresh = 0.5,
 | 
			
		||||
            .peace_thresh = 0.5,
 | 
			
		||||
            .max_scoring_label = "Negative",
 | 
			
		||||
            .max_score = 0.1,
 | 
			
		||||
            /* test_name= */ "TestNegativeWinnerWith_HighCustom_HighCanned_"
 | 
			
		||||
                             "Thresh3",
 | 
			
		||||
            /* custom_negative_score= */ 0.1,
 | 
			
		||||
            /* drama_score= */ 0.3,
 | 
			
		||||
            /* llama_score= */ 0.6,
 | 
			
		||||
            /* drama_thresh= */ 0.4,
 | 
			
		||||
            /* llama_thresh= */ 0.8,
 | 
			
		||||
            /* canned_negative_score= */ 0.3,
 | 
			
		||||
            /* bazinga_score= */ 0.2,
 | 
			
		||||
            /* joy_score= */ 0.3,
 | 
			
		||||
            /* peace_score= */ 0.2,
 | 
			
		||||
            /* bazinga_thresh= */ 0.5,
 | 
			
		||||
            /* joy_thresh= */ 0.5,
 | 
			
		||||
            /* peace_thresh= */ 0.5,
 | 
			
		||||
            /* max_scoring_label= */ "Negative",
 | 
			
		||||
            /* max_score= */ 0.1,
 | 
			
		||||
        },
 | 
			
		||||
    }),
 | 
			
		||||
    [](const testing::TestParamInfo<
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -117,24 +117,24 @@ TEST_P(Landmarks2dToMatrixCalculatorTest, OutputsCorrectResult) {
 | 
			
		|||
INSTANTIATE_TEST_CASE_P(
 | 
			
		||||
    LandmarksToMatrixCalculatorTests, Landmarks2dToMatrixCalculatorTest,
 | 
			
		||||
    testing::ValuesIn<Landmarks2dToMatrixCalculatorTestCase>(
 | 
			
		||||
        {{.test_name = "TestWithOffset0",
 | 
			
		||||
          .base_offset = 0,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.1f,
 | 
			
		||||
          .expected_cell_1_5 = 0.1875f,
 | 
			
		||||
          .rotation = 0},
 | 
			
		||||
         {.test_name = "TestWithOffset21",
 | 
			
		||||
          .base_offset = 21,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.1f,
 | 
			
		||||
          .expected_cell_1_5 = 0.1875f,
 | 
			
		||||
          .rotation = 0},
 | 
			
		||||
         {.test_name = "TestWithRotation",
 | 
			
		||||
          .base_offset = 0,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.075f,
 | 
			
		||||
          .expected_cell_1_5 = -0.25f,
 | 
			
		||||
          .rotation = M_PI / 2.0}}),
 | 
			
		||||
        {{/* test_name= */ "TestWithOffset0",
 | 
			
		||||
          /* base_offset= */ 0,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.1f,
 | 
			
		||||
          /* expected_cell_1_5= */ 0.1875f,
 | 
			
		||||
          /* rotation= */ 0},
 | 
			
		||||
         {/* test_name= */ "TestWithOffset21",
 | 
			
		||||
          /* base_offset= */ 21,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.1f,
 | 
			
		||||
          /* expected_cell_1_5= */ 0.1875f,
 | 
			
		||||
          /* rotation= */ 0},
 | 
			
		||||
         {/* test_name= */ "TestWithRotation",
 | 
			
		||||
          /* base_offset= */ 0,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.075f,
 | 
			
		||||
          /* expected_cell_1_5= */ -0.25f,
 | 
			
		||||
          /* rotation= */ M_PI / 2.0}}),
 | 
			
		||||
    [](const testing::TestParamInfo<
 | 
			
		||||
        Landmarks2dToMatrixCalculatorTest::ParamType>& info) {
 | 
			
		||||
      return info.param.test_name;
 | 
			
		||||
| 
						 | 
				
			
			@ -203,30 +203,30 @@ TEST_P(LandmarksWorld3dToMatrixCalculatorTest, OutputsCorrectResult) {
 | 
			
		|||
INSTANTIATE_TEST_CASE_P(
 | 
			
		||||
    LandmarksToMatrixCalculatorTests, LandmarksWorld3dToMatrixCalculatorTest,
 | 
			
		||||
    testing::ValuesIn<LandmarksWorld3dToMatrixCalculatorTestCase>(
 | 
			
		||||
        {{.test_name = "TestWithOffset0",
 | 
			
		||||
          .base_offset = 0,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.1f,
 | 
			
		||||
          .expected_cell_1_5 = 0.25,
 | 
			
		||||
          .rotation = 0},
 | 
			
		||||
         {.test_name = "TestWithOffset21",
 | 
			
		||||
          .base_offset = 21,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.1f,
 | 
			
		||||
          .expected_cell_1_5 = 0.25,
 | 
			
		||||
          .rotation = 0},
 | 
			
		||||
         {.test_name = "NoObjectNormalization",
 | 
			
		||||
          .base_offset = 0,
 | 
			
		||||
          .object_normalization_origin_offset = -1,
 | 
			
		||||
          .expected_cell_0_2 = 0.021f,
 | 
			
		||||
          .expected_cell_1_5 = 0.052f,
 | 
			
		||||
          .rotation = 0},
 | 
			
		||||
         {.test_name = "TestWithRotation",
 | 
			
		||||
          .base_offset = 0,
 | 
			
		||||
          .object_normalization_origin_offset = 0,
 | 
			
		||||
          .expected_cell_0_2 = 0.1f,
 | 
			
		||||
          .expected_cell_1_5 = -0.25f,
 | 
			
		||||
          .rotation = M_PI / 2.0}}),
 | 
			
		||||
        {{/* test_name= */ "TestWithOffset0",
 | 
			
		||||
          /* base_offset= */ 0,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.1f,
 | 
			
		||||
          /* expected_cell_1_5= */ 0.25,
 | 
			
		||||
          /* rotation= */ 0},
 | 
			
		||||
         {/* test_name= */ "TestWithOffset21",
 | 
			
		||||
          /* base_offset= */ 21,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.1f,
 | 
			
		||||
          /* expected_cell_1_5= */ 0.25,
 | 
			
		||||
          /* rotation= */ 0},
 | 
			
		||||
         {/* test_name= */ "NoObjectNormalization",
 | 
			
		||||
          /* base_offset= */ 0,
 | 
			
		||||
          /* object_normalization_origin_offset= */ -1,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.021f,
 | 
			
		||||
          /* expected_cell_1_5= */ 0.052f,
 | 
			
		||||
          /* rotation= */ 0},
 | 
			
		||||
         {/* test_name= */ "TestWithRotation",
 | 
			
		||||
          /* base_offset= */ 0,
 | 
			
		||||
          /* object_normalization_origin_offset= */ 0,
 | 
			
		||||
          /* expected_cell_0_2= */ 0.1f,
 | 
			
		||||
          /* expected_cell_1_5= */ -0.25f,
 | 
			
		||||
          /* rotation= */ M_PI / 2.0}}),
 | 
			
		||||
    [](const testing::TestParamInfo<
 | 
			
		||||
        LandmarksWorld3dToMatrixCalculatorTest::ParamType>& info) {
 | 
			
		||||
      return info.param.test_name;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -257,19 +257,28 @@ class HandDetectorGraph : public core::ModelTaskGraph {
 | 
			
		|||
    preprocessed_tensors >> inference.In("TENSORS");
 | 
			
		||||
    auto model_output_tensors = inference.Out("TENSORS");
 | 
			
		||||
 | 
			
		||||
    // TODO: support hand detection metadata.
 | 
			
		||||
    bool has_metadata = false;
 | 
			
		||||
 | 
			
		||||
    // Generates a single side packet containing a vector of SSD anchors.
 | 
			
		||||
    auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator");
 | 
			
		||||
    ConfigureSsdAnchorsCalculator(
 | 
			
		||||
        &ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>());
 | 
			
		||||
    auto& ssd_anchor_options =
 | 
			
		||||
        ssd_anchor.GetOptions<mediapipe::SsdAnchorsCalculatorOptions>();
 | 
			
		||||
    if (!has_metadata) {
 | 
			
		||||
      ConfigureSsdAnchorsCalculator(&ssd_anchor_options);
 | 
			
		||||
    }
 | 
			
		||||
    auto anchors = ssd_anchor.SideOut("");
 | 
			
		||||
 | 
			
		||||
    // Converts output tensors to Detections.
 | 
			
		||||
    auto& tensors_to_detections =
 | 
			
		||||
        graph.AddNode("TensorsToDetectionsCalculator");
 | 
			
		||||
    ConfigureTensorsToDetectionsCalculator(
 | 
			
		||||
        subgraph_options,
 | 
			
		||||
        &tensors_to_detections
 | 
			
		||||
             .GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
 | 
			
		||||
    if (!has_metadata) {
 | 
			
		||||
      ConfigureTensorsToDetectionsCalculator(
 | 
			
		||||
          subgraph_options,
 | 
			
		||||
          &tensors_to_detections
 | 
			
		||||
               .GetOptions<mediapipe::TensorsToDetectionsCalculatorOptions>());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    model_output_tensors >> tensors_to_detections.In("TENSORS");
 | 
			
		||||
    anchors >> tensors_to_detections.SideIn("ANCHORS");
 | 
			
		||||
    auto detections = tensors_to_detections.Out("DETECTIONS");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -148,6 +148,7 @@ cc_library(
 | 
			
		|||
        "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto",
 | 
			
		||||
        "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto",
 | 
			
		||||
        "//mediapipe/util:graph_builder_utils",
 | 
			
		||||
    ],
 | 
			
		||||
    alwayslink = 1,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,6 +14,7 @@ limitations under the License.
 | 
			
		|||
==============================================================================*/
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
| 
						 | 
				
			
			@ -41,6 +42,7 @@ limitations under the License.
 | 
			
		|||
#include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h"
 | 
			
		||||
#include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h"
 | 
			
		||||
#include "mediapipe/util/graph_builder_utils.h"
 | 
			
		||||
 | 
			
		||||
namespace mediapipe {
 | 
			
		||||
namespace tasks {
 | 
			
		||||
| 
						 | 
				
			
			@ -53,7 +55,7 @@ using ::mediapipe::NormalizedRect;
 | 
			
		|||
using ::mediapipe::api2::Input;
 | 
			
		||||
using ::mediapipe::api2::Output;
 | 
			
		||||
using ::mediapipe::api2::builder::Graph;
 | 
			
		||||
using ::mediapipe::api2::builder::Source;
 | 
			
		||||
using ::mediapipe::api2::builder::Stream;
 | 
			
		||||
using ::mediapipe::tasks::components::utils::DisallowIf;
 | 
			
		||||
using ::mediapipe::tasks::core::ModelAssetBundleResources;
 | 
			
		||||
using ::mediapipe::tasks::metadata::SetExternalFile;
 | 
			
		||||
| 
						 | 
				
			
			@ -78,40 +80,46 @@ constexpr char kHandLandmarksDetectorTFLiteName[] =
 | 
			
		|||
    "hand_landmarks_detector.tflite";
 | 
			
		||||
 | 
			
		||||
struct HandLandmarkerOutputs {
 | 
			
		||||
  Source<std::vector<NormalizedLandmarkList>> landmark_lists;
 | 
			
		||||
  Source<std::vector<LandmarkList>> world_landmark_lists;
 | 
			
		||||
  Source<std::vector<NormalizedRect>> hand_rects_next_frame;
 | 
			
		||||
  Source<std::vector<ClassificationList>> handednesses;
 | 
			
		||||
  Source<std::vector<NormalizedRect>> palm_rects;
 | 
			
		||||
  Source<std::vector<Detection>> palm_detections;
 | 
			
		||||
  Source<Image> image;
 | 
			
		||||
  Stream<std::vector<NormalizedLandmarkList>> landmark_lists;
 | 
			
		||||
  Stream<std::vector<LandmarkList>> world_landmark_lists;
 | 
			
		||||
  Stream<std::vector<NormalizedRect>> hand_rects_next_frame;
 | 
			
		||||
  Stream<std::vector<ClassificationList>> handednesses;
 | 
			
		||||
  Stream<std::vector<NormalizedRect>> palm_rects;
 | 
			
		||||
  Stream<std::vector<Detection>> palm_detections;
 | 
			
		||||
  Stream<Image> image;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Sets the base options in the sub tasks.
 | 
			
		||||
absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
 | 
			
		||||
                                   HandLandmarkerGraphOptions* options,
 | 
			
		||||
                                   bool is_copy) {
 | 
			
		||||
  ASSIGN_OR_RETURN(const auto hand_detector_file,
 | 
			
		||||
                   resources.GetModelFile(kHandDetectorTFLiteName));
 | 
			
		||||
  auto* hand_detector_graph_options =
 | 
			
		||||
      options->mutable_hand_detector_graph_options();
 | 
			
		||||
  SetExternalFile(hand_detector_file,
 | 
			
		||||
                  hand_detector_graph_options->mutable_base_options()
 | 
			
		||||
                      ->mutable_model_asset(),
 | 
			
		||||
                  is_copy);
 | 
			
		||||
  if (!hand_detector_graph_options->base_options().has_model_asset()) {
 | 
			
		||||
    ASSIGN_OR_RETURN(const auto hand_detector_file,
 | 
			
		||||
                     resources.GetModelFile(kHandDetectorTFLiteName));
 | 
			
		||||
    SetExternalFile(hand_detector_file,
 | 
			
		||||
                    hand_detector_graph_options->mutable_base_options()
 | 
			
		||||
                        ->mutable_model_asset(),
 | 
			
		||||
                    is_copy);
 | 
			
		||||
  }
 | 
			
		||||
  hand_detector_graph_options->mutable_base_options()
 | 
			
		||||
      ->mutable_acceleration()
 | 
			
		||||
      ->CopyFrom(options->base_options().acceleration());
 | 
			
		||||
  hand_detector_graph_options->mutable_base_options()->set_use_stream_mode(
 | 
			
		||||
      options->base_options().use_stream_mode());
 | 
			
		||||
  ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
 | 
			
		||||
                   resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
 | 
			
		||||
  auto* hand_landmarks_detector_graph_options =
 | 
			
		||||
      options->mutable_hand_landmarks_detector_graph_options();
 | 
			
		||||
  SetExternalFile(hand_landmarks_detector_file,
 | 
			
		||||
                  hand_landmarks_detector_graph_options->mutable_base_options()
 | 
			
		||||
                      ->mutable_model_asset(),
 | 
			
		||||
                  is_copy);
 | 
			
		||||
  if (!hand_landmarks_detector_graph_options->base_options()
 | 
			
		||||
           .has_model_asset()) {
 | 
			
		||||
    ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file,
 | 
			
		||||
                     resources.GetModelFile(kHandLandmarksDetectorTFLiteName));
 | 
			
		||||
    SetExternalFile(
 | 
			
		||||
        hand_landmarks_detector_file,
 | 
			
		||||
        hand_landmarks_detector_graph_options->mutable_base_options()
 | 
			
		||||
            ->mutable_model_asset(),
 | 
			
		||||
        is_copy);
 | 
			
		||||
  }
 | 
			
		||||
  hand_landmarks_detector_graph_options->mutable_base_options()
 | 
			
		||||
      ->mutable_acceleration()
 | 
			
		||||
      ->CopyFrom(options->base_options().acceleration());
 | 
			
		||||
| 
						 | 
				
			
			@ -119,7 +127,6 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources,
 | 
			
		|||
      ->set_use_stream_mode(options->base_options().use_stream_mode());
 | 
			
		||||
  return absl::OkStatus();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
// A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand
 | 
			
		||||
| 
						 | 
				
			
			@ -219,12 +226,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
 | 
			
		|||
          !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService)
 | 
			
		||||
               .IsAvailable()));
 | 
			
		||||
    }
 | 
			
		||||
    Stream<Image> image_in = graph.In(kImageTag).Cast<Image>();
 | 
			
		||||
    std::optional<Stream<NormalizedRect>> norm_rect_in;
 | 
			
		||||
    if (HasInput(sc->OriginalNode(), kNormRectTag)) {
 | 
			
		||||
      norm_rect_in = graph.In(kNormRectTag).Cast<NormalizedRect>();
 | 
			
		||||
    }
 | 
			
		||||
    ASSIGN_OR_RETURN(
 | 
			
		||||
        auto hand_landmarker_outputs,
 | 
			
		||||
        BuildHandLandmarkerGraph(
 | 
			
		||||
            sc->Options<HandLandmarkerGraphOptions>(),
 | 
			
		||||
            graph[Input<Image>(kImageTag)],
 | 
			
		||||
            graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
 | 
			
		||||
        BuildHandLandmarkerGraph(sc->Options<HandLandmarkerGraphOptions>(),
 | 
			
		||||
                                 image_in, norm_rect_in, graph));
 | 
			
		||||
    hand_landmarker_outputs.landmark_lists >>
 | 
			
		||||
        graph[Output<std::vector<NormalizedLandmarkList>>(kLandmarksTag)];
 | 
			
		||||
    hand_landmarker_outputs.world_landmark_lists >>
 | 
			
		||||
| 
						 | 
				
			
			@ -262,8 +272,8 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
 | 
			
		|||
  // image_in: (mediapipe::Image) stream to run hand landmark detection on.
 | 
			
		||||
  // graph: the mediapipe graph instance to be updated.
 | 
			
		||||
  absl::StatusOr<HandLandmarkerOutputs> BuildHandLandmarkerGraph(
 | 
			
		||||
      const HandLandmarkerGraphOptions& tasks_options, Source<Image> image_in,
 | 
			
		||||
      Source<NormalizedRect> norm_rect_in, Graph& graph) {
 | 
			
		||||
      const HandLandmarkerGraphOptions& tasks_options, Stream<Image> image_in,
 | 
			
		||||
      std::optional<Stream<NormalizedRect>> norm_rect_in, Graph& graph) {
 | 
			
		||||
    const int max_num_hands =
 | 
			
		||||
        tasks_options.hand_detector_graph_options().num_hands();
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -293,10 +303,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
 | 
			
		|||
      // track the hands from the last frame.
 | 
			
		||||
      auto image_for_hand_detector =
 | 
			
		||||
          DisallowIf(image_in, has_enough_hands, graph);
 | 
			
		||||
      auto norm_rect_in_for_hand_detector =
 | 
			
		||||
          DisallowIf(norm_rect_in, has_enough_hands, graph);
 | 
			
		||||
      std::optional<Stream<NormalizedRect>> norm_rect_in_for_hand_detector;
 | 
			
		||||
      if (norm_rect_in) {
 | 
			
		||||
        norm_rect_in_for_hand_detector =
 | 
			
		||||
            DisallowIf(norm_rect_in.value(), has_enough_hands, graph);
 | 
			
		||||
      }
 | 
			
		||||
      image_for_hand_detector >> hand_detector.In("IMAGE");
 | 
			
		||||
      norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT");
 | 
			
		||||
      if (norm_rect_in_for_hand_detector) {
 | 
			
		||||
        norm_rect_in_for_hand_detector.value() >> hand_detector.In("NORM_RECT");
 | 
			
		||||
      }
 | 
			
		||||
      auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
 | 
			
		||||
      auto& hand_association = graph.AddNode("HandAssociationCalculator");
 | 
			
		||||
      hand_association.GetOptions<HandAssociationCalculatorOptions>()
 | 
			
		||||
| 
						 | 
				
			
			@ -313,7 +328,9 @@ class HandLandmarkerGraph : public core::ModelTaskGraph {
 | 
			
		|||
      // series, and we don't want to enable the tracking and hand associations
 | 
			
		||||
      // between input images. Always use the hand detector graph.
 | 
			
		||||
      image_in >> hand_detector.In("IMAGE");
 | 
			
		||||
      norm_rect_in >> hand_detector.In("NORM_RECT");
 | 
			
		||||
      if (norm_rect_in) {
 | 
			
		||||
        norm_rect_in.value() >> hand_detector.In("NORM_RECT");
 | 
			
		||||
      }
 | 
			
		||||
      auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS");
 | 
			
		||||
      hand_rects_from_hand_detector >> clip_hand_rects.In("");
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -34,16 +34,14 @@ objc_library(
 | 
			
		|||
    data = [
 | 
			
		||||
        "//mediapipe/tasks/testdata/vision:test_images",
 | 
			
		||||
    ],
 | 
			
		||||
    sdk_frameworks = [
 | 
			
		||||
        "CoreMedia",
 | 
			
		||||
        "CoreVideo",
 | 
			
		||||
        "CoreGraphics",
 | 
			
		||||
        "UIKit",
 | 
			
		||||
        "Accelerate",
 | 
			
		||||
    ],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/ios/common:MPPCommon",
 | 
			
		||||
        "//mediapipe/tasks/ios/vision/core:MPPImage",
 | 
			
		||||
        "//third_party/apple_frameworks:Accelerate",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreGraphics",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreMedia",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreVideo",
 | 
			
		||||
        "//third_party/apple_frameworks:UIKit",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -11,11 +11,6 @@ objc_library(
 | 
			
		|||
        "-std=c++17",
 | 
			
		||||
    ],
 | 
			
		||||
    module_name = "MPPImage",
 | 
			
		||||
    sdk_frameworks = [
 | 
			
		||||
        "CoreMedia",
 | 
			
		||||
        "CoreVideo",
 | 
			
		||||
        "UIKit",
 | 
			
		||||
    ],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/ios/common:MPPCommon",
 | 
			
		||||
        "//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,27 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#import <Foundation/Foundation.h>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/packet.h"
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * This class helps create various kinds of packets for Mediapipe Vision Tasks.
 | 
			
		||||
 */
 | 
			
		||||
@interface MPPVisionPacketCreator : NSObject
 | 
			
		||||
 | 
			
		||||
+ (mediapipe::Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,43 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/image.h"
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
using ::mediapipe::Image;
 | 
			
		||||
using ::mediapipe::ImageFrame;
 | 
			
		||||
using ::mediapipe::MakePacket;
 | 
			
		||||
using ::mediapipe::Packet;
 | 
			
		||||
}  // namespace
 | 
			
		||||
 | 
			
		||||
struct freeDeleter {
 | 
			
		||||
  void operator()(void *ptr) { free(ptr); }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@implementation MPPVisionPacketCreator
 | 
			
		||||
 | 
			
		||||
+ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error {
 | 
			
		||||
  std::unique_ptr<ImageFrame> imageFrame = [image imageFrameWithError:error];
 | 
			
		||||
 | 
			
		||||
  if (!imageFrame) {
 | 
			
		||||
    return Packet();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return MakePacket<Image>(std::move(imageFrame));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
| 
						 | 
				
			
			@ -4,23 +4,22 @@ licenses(["notice"])
 | 
			
		|||
 | 
			
		||||
objc_library(
 | 
			
		||||
    name = "MPPImageUtils",
 | 
			
		||||
    srcs = ["sources/MPPImage+Utils.m"],
 | 
			
		||||
    srcs = ["sources/MPPImage+Utils.mm"],
 | 
			
		||||
    hdrs = ["sources/MPPImage+Utils.h"],
 | 
			
		||||
    copts = [
 | 
			
		||||
        "-ObjC++",
 | 
			
		||||
        "-std=c++17",
 | 
			
		||||
    ],
 | 
			
		||||
    module_name = "MPPImageUtils",
 | 
			
		||||
    sdk_frameworks = [
 | 
			
		||||
        "Accelerate",
 | 
			
		||||
        "CoreGraphics",
 | 
			
		||||
        "CoreImage",
 | 
			
		||||
        "CoreVideo",
 | 
			
		||||
        "UIKit",
 | 
			
		||||
    ],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/framework/formats:image_format_cc_proto",
 | 
			
		||||
        "//mediapipe/framework/formats:image_frame",
 | 
			
		||||
        "//mediapipe/tasks/ios/common:MPPCommon",
 | 
			
		||||
        "//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
 | 
			
		||||
        "//mediapipe/tasks/ios/vision/core:MPPImage",
 | 
			
		||||
        "//third_party/apple_frameworks:UIKit",
 | 
			
		||||
        "//third_party/apple_frameworks:Accelerate",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreGraphics",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreImage",
 | 
			
		||||
        "//third_party/apple_frameworks:CoreVideo",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,30 +14,27 @@
 | 
			
		|||
 | 
			
		||||
#import <Foundation/Foundation.h>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/image_frame.h"
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
 | 
			
		||||
 | 
			
		||||
NS_ASSUME_NONNULL_BEGIN
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library.
 | 
			
		||||
 * Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`.
 | 
			
		||||
 */
 | 
			
		||||
@interface MPPImage (Utils)
 | 
			
		||||
 | 
			
		||||
/** Bitmap size of the image. */
 | 
			
		||||
@property(nonatomic, readonly) CGSize bitmapSize;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Returns the underlying uint8 pixel buffer of an `MPPImage`.
 | 
			
		||||
 * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel
 | 
			
		||||
 * data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated
 | 
			
		||||
 * in the R, G, B channels.
 | 
			
		||||
 * Converts the `MPPImage` into a `mediapipe::ImageFrame`.
 | 
			
		||||
 * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is
 | 
			
		||||
 * converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R,
 | 
			
		||||
 * G, B channels.
 | 
			
		||||
 *
 | 
			
		||||
 * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
 | 
			
		||||
 * error will be saved.
 | 
			
		||||
 *
 | 
			
		||||
 * @return The underlying pixel buffer of the `MPPImage` or nil in case of errors.
 | 
			
		||||
 * @return An std::unique_ptr<mediapipe::ImageFrame> or `nullptr` in case of errors.
 | 
			
		||||
 */
 | 
			
		||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error;
 | 
			
		||||
- (std::unique_ptr<mediapipe::ImageFrame>)imageFrameWithError:(NSError **)error;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,12 @@
 | 
			
		|||
#import <CoreImage/CoreImage.h>
 | 
			
		||||
#import <CoreVideo/CoreVideo.h>
 | 
			
		||||
 | 
			
		||||
#include "mediapipe/framework/formats/image_format.pb.h"
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
using ::mediapipe::ImageFrame;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@interface MPPPixelDataUtils : NSObject
 | 
			
		||||
 | 
			
		||||
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
 | 
			
		||||
| 
						 | 
				
			
			@ -35,21 +41,20 @@
 | 
			
		|||
 | 
			
		||||
@interface MPPCVPixelBufferUtils : NSObject
 | 
			
		||||
 | 
			
		||||
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error;
 | 
			
		||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
 | 
			
		||||
                                                     error:(NSError **)error;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
@interface MPPCGImageUtils : NSObject
 | 
			
		||||
 | 
			
		||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
 | 
			
		||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
@interface UIImage (RawPixelDataUtils)
 | 
			
		||||
@interface UIImage (ImageFrameUtils)
 | 
			
		||||
 | 
			
		||||
@property(nonatomic, readonly) CGSize bitmapSize;
 | 
			
		||||
 | 
			
		||||
- (uint8_t *)pixelDataWithError:(NSError **)error;
 | 
			
		||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -120,9 +125,14 @@
 | 
			
		|||
 | 
			
		||||
@implementation MPPCVPixelBufferUtils
 | 
			
		||||
 | 
			
		||||
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error {
 | 
			
		||||
+ (std::unique_ptr<ImageFrame>)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
 | 
			
		||||
                                                        error:(NSError **)error {
 | 
			
		||||
  CVPixelBufferLockBaseAddress(pixelBuffer, 0);
 | 
			
		||||
 | 
			
		||||
  size_t width = CVPixelBufferGetWidth(pixelBuffer);
 | 
			
		||||
  size_t height = CVPixelBufferGetHeight(pixelBuffer);
 | 
			
		||||
  size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer);
 | 
			
		||||
 | 
			
		||||
  uint8_t *rgbPixelData = [MPPPixelDataUtils
 | 
			
		||||
      rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
 | 
			
		||||
                      withWidth:CVPixelBufferGetWidth(pixelBuffer)
 | 
			
		||||
| 
						 | 
				
			
			@ -133,19 +143,24 @@
 | 
			
		|||
 | 
			
		||||
  CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
 | 
			
		||||
 | 
			
		||||
  return rgbPixelData;
 | 
			
		||||
  if (!rgbPixelData) {
 | 
			
		||||
    return nullptr;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::unique_ptr<ImageFrame> imageFrame = absl::make_unique<ImageFrame>(
 | 
			
		||||
      ::mediapipe::ImageFormat::SRGB, width, height, stride, static_cast<uint8 *>(rgbPixelData),
 | 
			
		||||
      /*deleter=*/free);
 | 
			
		||||
 | 
			
		||||
  return imageFrame;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
 | 
			
		||||
                                           error:(NSError **)error {
 | 
			
		||||
  uint8_t *pixelData = NULL;
 | 
			
		||||
 | 
			
		||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
 | 
			
		||||
                                                     error:(NSError **)error {
 | 
			
		||||
  OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
 | 
			
		||||
 | 
			
		||||
  switch (pixelBufferFormat) {
 | 
			
		||||
    case kCVPixelFormatType_32BGRA: {
 | 
			
		||||
      pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error];
 | 
			
		||||
      break;
 | 
			
		||||
      return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error];
 | 
			
		||||
    }
 | 
			
		||||
    default: {
 | 
			
		||||
      [MPPCommonUtils createCustomError:error
 | 
			
		||||
| 
						 | 
				
			
			@ -155,20 +170,20 @@
 | 
			
		|||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return pixelData;
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
@implementation MPPCGImageUtils
 | 
			
		||||
 | 
			
		||||
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
 | 
			
		||||
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
 | 
			
		||||
  size_t width = CGImageGetWidth(cgImage);
 | 
			
		||||
  size_t height = CGImageGetHeight(cgImage);
 | 
			
		||||
 | 
			
		||||
  NSInteger bitsPerComponent = 8;
 | 
			
		||||
  NSInteger channelCount = 4;
 | 
			
		||||
  UInt8 *pixel_data_to_return = NULL;
 | 
			
		||||
  UInt8 *pixelDataToReturn = NULL;
 | 
			
		||||
 | 
			
		||||
  CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
 | 
			
		||||
  size_t bytesPerRow = channelCount * width;
 | 
			
		||||
| 
						 | 
				
			
			@ -191,12 +206,12 @@
 | 
			
		|||
    if (srcData) {
 | 
			
		||||
      // We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
 | 
			
		||||
      // a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
 | 
			
		||||
      pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
 | 
			
		||||
                                                                withWidth:width
 | 
			
		||||
                                                                   height:height
 | 
			
		||||
                                                                   stride:bytesPerRow
 | 
			
		||||
                                                        pixelBufferFormat:kCVPixelFormatType_32RGBA
 | 
			
		||||
                                                                    error:error];
 | 
			
		||||
      pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
 | 
			
		||||
                                                             withWidth:width
 | 
			
		||||
                                                                height:height
 | 
			
		||||
                                                                stride:bytesPerRow
 | 
			
		||||
                                                     pixelBufferFormat:kCVPixelFormatType_32RGBA
 | 
			
		||||
                                                                 error:error];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    CGContextRelease(context);
 | 
			
		||||
| 
						 | 
				
			
			@ -204,38 +219,38 @@
 | 
			
		|||
 | 
			
		||||
  CGColorSpaceRelease(colorSpace);
 | 
			
		||||
 | 
			
		||||
  return pixel_data_to_return;
 | 
			
		||||
  std::unique_ptr<ImageFrame> imageFrame =
 | 
			
		||||
      absl::make_unique<ImageFrame>(mediapipe::ImageFormat::SRGB, (int)width, (int)height,
 | 
			
		||||
                                    (int)bytesPerRow, static_cast<uint8 *>(pixelDataToReturn),
 | 
			
		||||
                                    /*deleter=*/free);
 | 
			
		||||
 | 
			
		||||
  return imageFrame;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
@implementation UIImage (RawPixelDataUtils)
 | 
			
		||||
 | 
			
		||||
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
 | 
			
		||||
  uint8_t *pixelData = NULL;
 | 
			
		||||
@implementation UIImage (ImageFrameUtils)
 | 
			
		||||
 | 
			
		||||
- (std::unique_ptr<ImageFrame>)imageFrameFromCIImageWithError:(NSError **)error {
 | 
			
		||||
  if (self.CIImage.pixelBuffer) {
 | 
			
		||||
    pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer
 | 
			
		||||
                                                            error:error];
 | 
			
		||||
    return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error];
 | 
			
		||||
 | 
			
		||||
  } else if (self.CIImage.CGImage) {
 | 
			
		||||
    pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error];
 | 
			
		||||
    return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error];
 | 
			
		||||
  } else {
 | 
			
		||||
    [MPPCommonUtils createCustomError:error
 | 
			
		||||
                             withCode:MPPTasksErrorCodeInvalidArgumentError
 | 
			
		||||
                          description:@"CIImage should have CGImage or CVPixelBuffer info."];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return pixelData;
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
- (uint8_t *)pixelDataWithError:(NSError **)error {
 | 
			
		||||
  uint8_t *pixelData = nil;
 | 
			
		||||
 | 
			
		||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
 | 
			
		||||
  if (self.CGImage) {
 | 
			
		||||
    pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error];
 | 
			
		||||
    return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error];
 | 
			
		||||
  } else if (self.CIImage) {
 | 
			
		||||
    pixelData = [self pixelDataFromCIImageWithError:error];
 | 
			
		||||
    return [self imageFrameFromCIImageWithError:error];
 | 
			
		||||
  } else {
 | 
			
		||||
    [MPPCommonUtils createCustomError:error
 | 
			
		||||
                             withCode:MPPTasksErrorCodeInvalidArgumentError
 | 
			
		||||
| 
						 | 
				
			
			@ -243,46 +258,24 @@
 | 
			
		|||
                                       " CIImage or CGImage."];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return pixelData;
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
- (CGSize)bitmapSize {
 | 
			
		||||
  CGFloat width = 0;
 | 
			
		||||
  CGFloat height = 0;
 | 
			
		||||
 | 
			
		||||
  if (self.CGImage) {
 | 
			
		||||
    width = CGImageGetWidth(self.CGImage);
 | 
			
		||||
    height = CGImageGetHeight(self.CGImage);
 | 
			
		||||
  } else if (self.CIImage.pixelBuffer) {
 | 
			
		||||
    width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
 | 
			
		||||
    height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
 | 
			
		||||
  } else if (self.CIImage.CGImage) {
 | 
			
		||||
    width = CGImageGetWidth(self.CIImage.CGImage);
 | 
			
		||||
    height = CGImageGetHeight(self.CIImage.CGImage);
 | 
			
		||||
  }
 | 
			
		||||
  return CGSizeMake(width, height);
 | 
			
		||||
}
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
@implementation MPPImage (Utils)
 | 
			
		||||
 | 
			
		||||
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error {
 | 
			
		||||
  uint8_t *pixelData = NULL;
 | 
			
		||||
 | 
			
		||||
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
 | 
			
		||||
  switch (self.imageSourceType) {
 | 
			
		||||
    case MPPImageSourceTypeSampleBuffer: {
 | 
			
		||||
      CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
 | 
			
		||||
      pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer
 | 
			
		||||
                                                              error:error];
 | 
			
		||||
      break;
 | 
			
		||||
      return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error];
 | 
			
		||||
    }
 | 
			
		||||
    case MPPImageSourceTypePixelBuffer: {
 | 
			
		||||
      pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error];
 | 
			
		||||
      break;
 | 
			
		||||
      return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error];
 | 
			
		||||
    }
 | 
			
		||||
    case MPPImageSourceTypeImage: {
 | 
			
		||||
      pixelData = [self.image pixelDataWithError:error];
 | 
			
		||||
      break;
 | 
			
		||||
      return [self.image imageFrameWithError:error];
 | 
			
		||||
    }
 | 
			
		||||
    default:
 | 
			
		||||
      [MPPCommonUtils createCustomError:error
 | 
			
		||||
| 
						 | 
				
			
			@ -290,35 +283,7 @@
 | 
			
		|||
                            description:@"Invalid source type for MPPImage."];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return pixelData;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
- (CGSize)bitmapSize {
 | 
			
		||||
  CGFloat width = 0;
 | 
			
		||||
  CGFloat height = 0;
 | 
			
		||||
 | 
			
		||||
  switch (self.imageSourceType) {
 | 
			
		||||
    case MPPImageSourceTypeSampleBuffer: {
 | 
			
		||||
      CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
 | 
			
		||||
      width = CVPixelBufferGetWidth(pixelBuffer);
 | 
			
		||||
      height = CVPixelBufferGetHeight(pixelBuffer);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case MPPImageSourceTypePixelBuffer: {
 | 
			
		||||
      width = CVPixelBufferGetWidth(self.pixelBuffer);
 | 
			
		||||
      height = CVPixelBufferGetHeight(self.pixelBuffer);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    case MPPImageSourceTypeImage: {
 | 
			
		||||
      width = self.image.bitmapSize.width;
 | 
			
		||||
      height = self.image.bitmapSize.height;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    default:
 | 
			
		||||
      break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return CGSizeMake(width, height);
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
							
								
								
									
										38
									
								
								mediapipe/tasks/ios/vision/image_classifier/BUILD
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								mediapipe/tasks/ios/vision/image_classifier/BUILD
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,38 @@
 | 
			
		|||
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
package(default_visibility = ["//mediapipe/tasks:internal"])
 | 
			
		||||
 | 
			
		||||
licenses(["notice"])
 | 
			
		||||
 | 
			
		||||
objc_library(
 | 
			
		||||
    name = "MPPImageClassifierResult",
 | 
			
		||||
    srcs = ["sources/MPPImageClassifierResult.m"],
 | 
			
		||||
    hdrs = ["sources/MPPImageClassifierResult.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        "//mediapipe/tasks/ios/components/containers:MPPClassificationResult",
 | 
			
		||||
        "//mediapipe/tasks/ios/core:MPPTaskResult",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
objc_library(
 | 
			
		||||
    name = "MPPImageClassifierOptions",
 | 
			
		||||
    srcs = ["sources/MPPImageClassifierOptions.m"],
 | 
			
		||||
    hdrs = ["sources/MPPImageClassifierOptions.h"],
 | 
			
		||||
    deps = [
 | 
			
		||||
        ":MPPImageClassifierResult",
 | 
			
		||||
        "//mediapipe/tasks/ios/core:MPPTaskOptions",
 | 
			
		||||
        "//mediapipe/tasks/ios/vision/core:MPPRunningMode",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,71 @@
 | 
			
		|||
// Copyright 2023 The MediaPipe Authors.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
//      http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
#import <Foundation/Foundation.h>
 | 
			
		||||
 | 
			
		||||
#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h"
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
 | 
			
		||||
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
 | 
			
		||||
 | 
			
		||||
NS_ASSUME_NONNULL_BEGIN
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Options for setting up a `MPPImageClassifier`.
 | 
			
		||||
 */
 | 
			
		||||
NS_SWIFT_NAME(ImageClassifierOptions)
 | 
			
		||||
@interface MPPImageClassifierOptions : MPPTaskOptions <NSCopying>
 | 
			
		||||
 | 
			
		||||
@property(nonatomic) MPPRunningMode runningMode;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The user-defined result callback for processing live stream data. The result callback should only
 | 
			
		||||
 * be specified when the running mode is set to the live stream mode.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults
 | 
			
		||||
 * to English.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic, copy) NSString *displayNamesLocale;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The maximum number of top-scored classification results to return. If < 0, all available results
 | 
			
		||||
 * will be returned. If 0, an invalid argument error is returned.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic) NSInteger maxResults;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Score threshold to override the one provided in the model metadata (if any). Results below this
 | 
			
		||||
 * value are rejected.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic) float scoreThreshold;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The allowlist of category names. If non-empty, detection results whose category name is not in
 | 
			
		||||
 * this set will be filtered out. Duplicate or unknown category names are ignored. Mutually
 | 
			
		||||
 * exclusive with categoryDenylist.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic, copy) NSArray<NSString *> *categoryAllowlist;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The denylist of category names. If non-empty, detection results whose category name is in this
 | 
			
		||||
 * set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive
 | 
			
		||||
 * with categoryAllowlist.
 | 
			
		||||
 */
 | 
			
		||||
@property(nonatomic, copy) NSArray<NSString *> *categoryDenylist;
 | 
			
		||||
 | 
			
		||||
@end
 | 
			
		||||
 | 
			
		||||
NS_ASSUME_NONNULL_END
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue
	
	Block a user