Internal change

PiperOrigin-RevId: 529495239
This commit is contained in:
MediaPipe Team 2023-05-04 13:15:01 -07:00 committed by jqtang
parent 64cad80543
commit 12b0b6fad1
11 changed files with 534 additions and 518 deletions

195
README.md
View File

@ -4,8 +4,6 @@ title: Home
nav_order: 1
---
![MediaPipe](https://mediapipe.dev/images/mediapipe_small.png)
----
**Attention:** *Thanks for your interest in MediaPipe! We have moved to
@ -14,86 +12,111 @@ as the primary developer documentation site for MediaPipe as of April 3, 2023.*
*This notice and web page will be removed on June 1, 2023.*
----
![MediaPipe](https://developers.google.com/static/mediapipe/images/home/hero_01_1920.png)
<br><br><br><br><br><br><br><br><br><br>
<br><br><br><br><br><br><br><br><br><br>
<br><br><br><br><br><br><br><br><br><br>
**Attention**: MediaPipe Solutions Preview is an early release. [Learn
more](https://developers.google.com/mediapipe/solutions/about#notice).
--------------------------------------------------------------------------------
**On-device machine learning for everyone**
## Live ML anywhere
Delight your customers with innovative machine learning features. MediaPipe
contains everything that you need to customize and deploy to mobile (Android,
iOS), web, desktop, edge devices, and IoT, effortlessly.
[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable
ML solutions for live and streaming media.
* [See demos](https://goo.gle/mediapipe-studio)
* [Learn more](https://developers.google.com/mediapipe/solutions)
![accelerated.png](https://mediapipe.dev/images/accelerated_small.png) | ![cross_platform.png](https://mediapipe.dev/images/cross_platform_small.png)
:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------:
***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
## Get started
----
You can get started with MediaPipe Solutions by by checking out any of the
developer guides for
[vision](https://developers.google.com/mediapipe/solutions/vision/object_detector),
[text](https://developers.google.com/mediapipe/solutions/text/text_classifier),
and
[audio](https://developers.google.com/mediapipe/solutions/audio/audio_classifier)
tasks. If you need help setting up a development environment for use with
MediaPipe Tasks, check out the setup guides for
[Android](https://developers.google.com/mediapipe/solutions/setup_android), [web
apps](https://developers.google.com/mediapipe/solutions/setup_web), and
[Python](https://developers.google.com/mediapipe/solutions/setup_python).
## ML solutions in MediaPipe
## Solutions
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------:
[![face_detection](https://mediapipe.dev/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](https://mediapipe.dev/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](https://mediapipe.dev/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](https://mediapipe.dev/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](https://mediapipe.dev/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](https://mediapipe.dev/images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic)
MediaPipe Solutions provides a suite of libraries and tools for you to quickly
apply artificial intelligence (AI) and machine learning (ML) techniques in your
applications. You can plug these solutions into your applications immediately,
customize them to your needs, and use them across multiple development
platforms. MediaPipe Solutions is part of the MediaPipe [open source
project](https://github.com/google/mediapipe), so you can further customize the
solutions code to meet your application needs.
Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![hair_segmentation](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](https://mediapipe.dev/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](https://mediapipe.dev/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](https://mediapipe.dev/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](https://mediapipe.dev/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](https://mediapipe.dev/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
These libraries and resources provide the core functionality for each MediaPipe
Solution:
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to solutions/solutions.md. -->
* **MediaPipe Tasks**: Cross-platform APIs and libraries for deploying
solutions. [Learn
more](https://developers.google.com/mediapipe/solutions/tasks).
* **MediaPipe models**: Pre-trained, ready-to-run models for use with each
solution.
[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md)
:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Selfie Segmentation](https://google.github.io/mediapipe/solutions/selfie_segmentation) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | |
These tools let you customize and evaluate solutions:
See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
* **MediaPipe Model Maker**: Customize models for solutions with your data.
[Learn more](https://developers.google.com/mediapipe/solutions/model_maker).
* **MediaPipe Studio**: Visualize, evaluate, and benchmark solutions in your
browser. [Learn
more](https://developers.google.com/mediapipe/solutions/studio).
## Getting started
### Legacy solutions
To start using MediaPipe
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
lines code, see example code and demos in
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
We have ended support for [these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
as of March 1, 2023. All other MediaPipe Legacy Solutions will be upgraded to
a new MediaPipe Solution. See the [Solutions guide](https://developers.google.com/mediapipe/solutions/guide#legacy)
for details. The [code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to be
provided on an as-is basis.
To use MediaPipe in C++, Android and iOS, which allow further customization of
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
building your own, learn how to
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
start building example applications in
[C++](https://google.github.io/mediapipe/getting_started/cpp),
[Android](https://google.github.io/mediapipe/getting_started/android) and
[iOS](https://google.github.io/mediapipe/getting_started/ios).
For more on the legacy solutions, see the [documentation](https://github.com/google/mediapipe/tree/master/docs/solutions).
The source code is hosted in the
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
run code search using
[Google Open Source Code Search](https://cs.opensource.google/mediapipe/mediapipe).
## Framework
## Publications
To start using MediaPipe Framework, [install MediaPipe
Framework](https://developers.google.com/mediapipe/framework/getting_started/install)
and start building example applications in C++, Android, and iOS.
[MediaPipe Framework](https://developers.google.com/mediapipe/framework) is the
low-level component used to build efficient on-device machine learning
pipelines, similar to the premade MediaPipe Solutions.
Before using MediaPipe Framework, familiarize yourself with the following key
[Framework
concepts](https://developers.google.com/mediapipe/framework/framework_concepts/overview.md):
* [Packets](https://developers.google.com/mediapipe/framework/framework_concepts/packets.md)
* [Graphs](https://developers.google.com/mediapipe/framework/framework_concepts/graphs.md)
* [Calculators](https://developers.google.com/mediapipe/framework/framework_concepts/calculators.md)
## Community
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe
users.
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe.
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and
software.
## Contributing
We welcome contributions. Please follow these
[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md).
We use GitHub issues for tracking requests and bugs. Please post questions to
the MediaPipe Stack Overflow with a `mediapipe` tag.
## Resources
### Publications
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
in Google Developers Blog
@ -102,7 +125,8 @@ run code search using
* [SignAll SDK: Sign language interface using MediaPipe is now available for
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
in Google Developers Blog
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on
Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
in Google AI Blog
@ -130,43 +154,6 @@ run code search using
in Google AI Blog
* [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172)
## Videos
### Videos
* [YouTube Channel](https://www.youtube.com/c/MediaPipe)
## Events
* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020)
* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/)
* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/)
* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038)
* [ML Conference, Berlin, 11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/)
* [MediaPipe Berlin Meetup, Google Berlin, 11 Dec 2019](https://www.meetup.com/Berlin-AI-Tech-Talk/events/266328794/)
* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop,
Seoul, Korea ICCV
2019](https://research.google.com/youtube8m/workshop2019/index.html)
* [AI DevWorld 2019, 10 Oct 2019, San Jose, CA](https://aidevworld.com)
* [Google Industry Workshop at ICIP 2019, 24 Sept 2019, Taipei, Taiwan](http://2019.ieeeicip.org/?action=page4&id=14#Google)
([presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5))
* [Open sourced at CVPR 2019, 17~20 June, Long Beach, CA](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe)
## Community
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe
## Alpha disclaimer
MediaPipe is currently in alpha at v0.7. We may be still making breaking API
changes and expect to get to stable APIs by v1.0.
## Contributing
We welcome contributions. Please follow these
[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md).
We use GitHub issues for tracking requests and bugs. Please post questions to
the MediaPipe Stack Overflow with a `mediapipe` tag.

View File

@ -4,8 +4,6 @@ title: Home
nav_order: 1
---
![MediaPipe](https://mediapipe.dev/images/mediapipe_small.png)
----
**Attention:** *Thanks for your interest in MediaPipe! We have moved to
@ -14,86 +12,111 @@ as the primary developer documentation site for MediaPipe as of April 3, 2023.*
*This notice and web page will be removed on June 1, 2023.*
----
![MediaPipe](https://developers.google.com/static/mediapipe/images/home/hero_01_1920.png)
<br><br><br><br><br><br><br><br><br><br>
<br><br><br><br><br><br><br><br><br><br>
<br><br><br><br><br><br><br><br><br><br>
**Attention**: MediaPipe Solutions Preview is an early release. [Learn
more](https://developers.google.com/mediapipe/solutions/about#notice).
--------------------------------------------------------------------------------
**On-device machine learning for everyone**
## Live ML anywhere
Delight your customers with innovative machine learning features. MediaPipe
contains everything that you need to customize and deploy to mobile (Android,
iOS), web, desktop, edge devices, and IoT, effortlessly.
[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable
ML solutions for live and streaming media.
* [See demos](https://goo.gle/mediapipe-studio)
* [Learn more](https://developers.google.com/mediapipe/solutions)
![accelerated.png](https://mediapipe.dev/images/accelerated_small.png) | ![cross_platform.png](https://mediapipe.dev/images/cross_platform_small.png)
:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------:
***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT*
![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png)
***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable*
## Get started
----
You can get started with MediaPipe Solutions by by checking out any of the
developer guides for
[vision](https://developers.google.com/mediapipe/solutions/vision/object_detector),
[text](https://developers.google.com/mediapipe/solutions/text/text_classifier),
and
[audio](https://developers.google.com/mediapipe/solutions/audio/audio_classifier)
tasks. If you need help setting up a development environment for use with
MediaPipe Tasks, check out the setup guides for
[Android](https://developers.google.com/mediapipe/solutions/setup_android), [web
apps](https://developers.google.com/mediapipe/solutions/setup_web), and
[Python](https://developers.google.com/mediapipe/solutions/setup_python).
## ML solutions in MediaPipe
## Solutions
Face Detection | Face Mesh | Iris | Hands | Pose | Holistic
:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------:
[![face_detection](https://mediapipe.dev/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](https://mediapipe.dev/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](https://mediapipe.dev/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](https://mediapipe.dev/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](https://mediapipe.dev/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](https://mediapipe.dev/images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic)
MediaPipe Solutions provides a suite of libraries and tools for you to quickly
apply artificial intelligence (AI) and machine learning (ML) techniques in your
applications. You can plug these solutions into your applications immediately,
customize them to your needs, and use them across multiple development
platforms. MediaPipe Solutions is part of the MediaPipe [open source
project](https://github.com/google/mediapipe), so you can further customize the
solutions code to meet your application needs.
Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT
:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---:
[![hair_segmentation](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](https://mediapipe.dev/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](https://mediapipe.dev/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](https://mediapipe.dev/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](https://mediapipe.dev/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](https://mediapipe.dev/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift)
These libraries and resources provide the core functionality for each MediaPipe
Solution:
<!-- []() in the first cell is needed to preserve table formatting in GitHub Pages. -->
<!-- Whenever this table is updated, paste a copy to solutions/solutions.md. -->
* **MediaPipe Tasks**: Cross-platform APIs and libraries for deploying
solutions. [Learn
more](https://developers.google.com/mediapipe/solutions/tasks).
* **MediaPipe models**: Pre-trained, ready-to-run models for use with each
solution.
[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md)
:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Selfie Segmentation](https://google.github.io/mediapipe/solutions/selfie_segmentation) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | |
[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | |
[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ |
[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | |
[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | |
[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | |
[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | |
These tools let you customize and evaluate solutions:
See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
* **MediaPipe Model Maker**: Customize models for solutions with your data.
[Learn more](https://developers.google.com/mediapipe/solutions/model_maker).
* **MediaPipe Studio**: Visualize, evaluate, and benchmark solutions in your
browser. [Learn
more](https://developers.google.com/mediapipe/solutions/studio).
## Getting started
### Legacy solutions
To start using MediaPipe
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
lines code, see example code and demos in
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
We have ended support for [these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy)
as of March 1, 2023. All other MediaPipe Legacy Solutions will be upgraded to
a new MediaPipe Solution. See the [Solutions guide](https://developers.google.com/mediapipe/solutions/guide#legacy)
for details. The [code repository](https://github.com/google/mediapipe/tree/master/mediapipe)
and prebuilt binaries for all MediaPipe Legacy Solutions will continue to be
provided on an as-is basis.
To use MediaPipe in C++, Android and iOS, which allow further customization of
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
building your own, learn how to
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
start building example applications in
[C++](https://google.github.io/mediapipe/getting_started/cpp),
[Android](https://google.github.io/mediapipe/getting_started/android) and
[iOS](https://google.github.io/mediapipe/getting_started/ios).
For more on the legacy solutions, see the [documentation](https://github.com/google/mediapipe/tree/master/docs/solutions).
The source code is hosted in the
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
run code search using
[Google Open Source Code Search](https://cs.opensource.google/mediapipe/mediapipe).
## Framework
## Publications
To start using MediaPipe Framework, [install MediaPipe
Framework](https://developers.google.com/mediapipe/framework/getting_started/install)
and start building example applications in C++, Android, and iOS.
[MediaPipe Framework](https://developers.google.com/mediapipe/framework) is the
low-level component used to build efficient on-device machine learning
pipelines, similar to the premade MediaPipe Solutions.
Before using MediaPipe Framework, familiarize yourself with the following key
[Framework
concepts](https://developers.google.com/mediapipe/framework/framework_concepts/overview.md):
* [Packets](https://developers.google.com/mediapipe/framework/framework_concepts/packets.md)
* [Graphs](https://developers.google.com/mediapipe/framework/framework_concepts/graphs.md)
* [Calculators](https://developers.google.com/mediapipe/framework/framework_concepts/calculators.md)
## Community
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe
users.
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe.
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and
software.
## Contributing
We welcome contributions. Please follow these
[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md).
We use GitHub issues for tracking requests and bugs. Please post questions to
the MediaPipe Stack Overflow with a `mediapipe` tag.
## Resources
### Publications
* [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html)
in Google Developers Blog
@ -102,7 +125,8 @@ run code search using
* [SignAll SDK: Sign language interface using MediaPipe is now available for
developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html)
in Google Developers Blog
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on
Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html)
in Google AI Blog
* [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
in Google AI Blog
@ -130,43 +154,6 @@ run code search using
in Google AI Blog
* [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172)
## Videos
### Videos
* [YouTube Channel](https://www.youtube.com/c/MediaPipe)
## Events
* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020)
* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/)
* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/)
* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038)
* [ML Conference, Berlin, 11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/)
* [MediaPipe Berlin Meetup, Google Berlin, 11 Dec 2019](https://www.meetup.com/Berlin-AI-Tech-Talk/events/266328794/)
* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop,
Seoul, Korea ICCV
2019](https://research.google.com/youtube8m/workshop2019/index.html)
* [AI DevWorld 2019, 10 Oct 2019, San Jose, CA](https://aidevworld.com)
* [Google Industry Workshop at ICIP 2019, 24 Sept 2019, Taipei, Taiwan](http://2019.ieeeicip.org/?action=page4&id=14#Google)
([presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5))
* [Open sourced at CVPR 2019, 17~20 June, Long Beach, CA](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe)
## Community
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe
## Alpha disclaimer
MediaPipe is currently in alpha at v0.7. We may be still making breaking API
changes and expect to get to stable APIs by v1.0.
## Contributing
We welcome contributions. Please follow these
[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md).
We use GitHub issues for tracking requests and bugs. Please post questions to
the MediaPipe Stack Overflow with a `mediapipe` tag.

View File

@ -106,7 +106,7 @@ class Dataset(classification_dataset.ClassificationDataset):
...
Each <file0>.xml annotation file should have the following format:
<annotation>
<filename>file0.jpg<filename>
<filename>file0.jpg</filename>
<object>
<name>kangaroo</name>
<bndbox>
@ -114,6 +114,7 @@ class Dataset(classification_dataset.ClassificationDataset):
<ymin>89</ymin>
<xmax>386</xmax>
<ymax>262</ymax>
</bndbox>
</object>
<object>...</object>
</annotation>

View File

@ -27,8 +27,6 @@ class HParams(hp.BaseHParams):
learning_rate: Learning rate to use for gradient descent training.
batch_size: Batch size for training.
epochs: Number of training iterations over the dataset.
do_fine_tuning: If true, the base module is trained together with the
classification layer on top.
cosine_decay_epochs: The number of epochs for cosine decay learning rate.
See
https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/CosineDecay
@ -39,13 +37,13 @@ class HParams(hp.BaseHParams):
"""
# Parameters from BaseHParams class.
learning_rate: float = 0.003
batch_size: int = 32
epochs: int = 10
learning_rate: float = 0.3
batch_size: int = 8
epochs: int = 30
# Parameters for cosine learning rate decay
cosine_decay_epochs: Optional[int] = None
cosine_decay_alpha: float = 0.0
cosine_decay_alpha: float = 1.0
@dataclasses.dataclass
@ -67,8 +65,8 @@ class QATHParams:
for more information.
"""
learning_rate: float = 0.03
batch_size: int = 32
epochs: int = 10
decay_steps: int = 231
learning_rate: float = 0.3
batch_size: int = 8
epochs: int = 15
decay_steps: int = 8
decay_rate: float = 0.96

View File

@ -361,9 +361,10 @@ class FaceStylizerGraph : public core::ModelTaskGraph {
auto& tensors_to_image =
graph.AddNode("mediapipe.tasks.TensorsToImageCalculator");
ConfigureTensorsToImageCalculator(
image_to_tensor_options,
&tensors_to_image.GetOptions<TensorsToImageCalculatorOptions>());
auto& tensors_to_image_options =
tensors_to_image.GetOptions<TensorsToImageCalculatorOptions>();
tensors_to_image_options.mutable_input_tensor_float_range()->set_min(-1);
tensors_to_image_options.mutable_input_tensor_float_range()->set_max(1);
face_alignment_image >> tensors_to_image.In(kTensorsTag);
face_alignment = tensors_to_image.Out(kImageTag).Cast<Image>();

View File

@ -41,7 +41,11 @@ mediapipe_ts_library(
mediapipe_ts_library(
name = "image",
srcs = ["image.ts"],
srcs = [
"image.ts",
"image_converter.ts",
"image_shader_context.ts",
],
)
mediapipe_ts_library(

View File

@ -16,7 +16,8 @@
import 'jasmine';
import {MPImage, MPImageShaderContext, MPImageType} from './image';
import {MPImage, MPImageType} from './image';
import {MPImageShaderContext} from './image_shader_context';
const WIDTH = 2;
const HEIGHT = 2;

View File

@ -14,6 +14,9 @@
* limitations under the License.
*/
import {DefaultColorConverter} from '../../../../tasks/web/vision/core/image_converter';
import {assertNotNull, MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context';
/** The underlying type of the image. */
export enum MPImageType {
/** Represents the native `UInt8ClampedArray` type. */
@ -34,235 +37,6 @@ export enum MPImageType {
export type MPImageContainer =
Uint8ClampedArray|Float32Array|ImageData|ImageBitmap|WebGLTexture;
const VERTEX_SHADER = `
attribute vec2 aVertex;
attribute vec2 aTex;
varying vec2 vTex;
void main(void) {
gl_Position = vec4(aVertex, 0.0, 1.0);
vTex = aTex;
}`;
const FRAGMENT_SHADER = `
precision mediump float;
varying vec2 vTex;
uniform sampler2D inputTexture;
void main() {
gl_FragColor = texture2D(inputTexture, vTex);
}
`;
function assertNotNull<T>(value: T|null, msg: string): T {
if (value === null) {
throw new Error(`Unable to obtain required WebGL resource: ${msg}`);
}
return value;
}
// TODO: Move internal-only types to different module.
/**
* Utility class that encapsulates the buffers used by `MPImageShaderContext`.
* For internal use only.
*/
class MPImageShaderBuffers {
constructor(
private readonly gl: WebGL2RenderingContext,
private readonly vertexArrayObject: WebGLVertexArrayObject,
private readonly vertexBuffer: WebGLBuffer,
private readonly textureBuffer: WebGLBuffer) {}
bind() {
this.gl.bindVertexArray(this.vertexArrayObject);
}
unbind() {
this.gl.bindVertexArray(null);
}
close() {
this.gl.deleteVertexArray(this.vertexArrayObject);
this.gl.deleteBuffer(this.vertexBuffer);
this.gl.deleteBuffer(this.textureBuffer);
}
}
/**
* A class that encapsulates the shaders used by an MPImage. Can be re-used
* across MPImages that use the same WebGL2Rendering context.
*
* For internal use only.
*/
export class MPImageShaderContext {
private gl?: WebGL2RenderingContext;
private framebuffer?: WebGLFramebuffer;
private program?: WebGLProgram;
private vertexShader?: WebGLShader;
private fragmentShader?: WebGLShader;
private aVertex?: GLint;
private aTex?: GLint;
/**
* The shader buffers used for passthrough renders that don't modify the
* input texture.
*/
private shaderBuffersPassthrough?: MPImageShaderBuffers;
/**
* The shader buffers used for passthrough renders that flip the input texture
* vertically before conversion to a different type. This is used to flip the
* texture to the expected orientation for drawing in the browser.
*/
private shaderBuffersFlipVertically?: MPImageShaderBuffers;
private compileShader(source: string, type: number): WebGLShader {
const gl = this.gl!;
const shader =
assertNotNull(gl.createShader(type), 'Failed to create WebGL shader');
gl.shaderSource(shader, source);
gl.compileShader(shader);
if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) {
const info = gl.getShaderInfoLog(shader);
throw new Error(`Could not compile WebGL shader: ${info}`);
}
gl.attachShader(this.program!, shader);
return shader;
}
private setupShaders(): void {
const gl = this.gl!;
this.program =
assertNotNull(gl.createProgram()!, 'Failed to create WebGL program');
this.vertexShader = this.compileShader(VERTEX_SHADER, gl.VERTEX_SHADER);
this.fragmentShader =
this.compileShader(FRAGMENT_SHADER, gl.FRAGMENT_SHADER);
gl.linkProgram(this.program);
const linked = gl.getProgramParameter(this.program, gl.LINK_STATUS);
if (!linked) {
const info = gl.getProgramInfoLog(this.program);
throw new Error(`Error during program linking: ${info}`);
}
this.aVertex = gl.getAttribLocation(this.program, 'aVertex');
this.aTex = gl.getAttribLocation(this.program, 'aTex');
}
private createBuffers(flipVertically: boolean): MPImageShaderBuffers {
const gl = this.gl!;
const vertexArrayObject =
assertNotNull(gl.createVertexArray(), 'Failed to create vertex array');
gl.bindVertexArray(vertexArrayObject);
const vertexBuffer =
assertNotNull(gl.createBuffer(), 'Failed to create buffer');
gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer);
gl.enableVertexAttribArray(this.aVertex!);
gl.vertexAttribPointer(this.aVertex!, 2, gl.FLOAT, false, 0, 0);
gl.bufferData(
gl.ARRAY_BUFFER, new Float32Array([-1, -1, -1, 1, 1, 1, 1, -1]),
gl.STATIC_DRAW);
const textureBuffer =
assertNotNull(gl.createBuffer(), 'Failed to create buffer');
gl.bindBuffer(gl.ARRAY_BUFFER, textureBuffer);
gl.enableVertexAttribArray(this.aTex!);
gl.vertexAttribPointer(this.aTex!, 2, gl.FLOAT, false, 0, 0);
const bufferData =
flipVertically ? [0, 1, 0, 0, 1, 0, 1, 1] : [0, 0, 0, 1, 1, 1, 1, 0];
gl.bufferData(
gl.ARRAY_BUFFER, new Float32Array(bufferData), gl.STATIC_DRAW);
gl.bindBuffer(gl.ARRAY_BUFFER, null);
gl.bindVertexArray(null);
return new MPImageShaderBuffers(
gl, vertexArrayObject, vertexBuffer, textureBuffer);
}
private getShaderBuffers(flipVertically: boolean): MPImageShaderBuffers {
if (flipVertically) {
if (!this.shaderBuffersFlipVertically) {
this.shaderBuffersFlipVertically =
this.createBuffers(/* flipVertically= */ true);
}
return this.shaderBuffersFlipVertically;
} else {
if (!this.shaderBuffersPassthrough) {
this.shaderBuffersPassthrough =
this.createBuffers(/* flipVertically= */ false);
}
return this.shaderBuffersPassthrough;
}
}
private maybeInitGL(gl: WebGL2RenderingContext): void {
if (!this.gl) {
this.gl = gl;
} else if (gl !== this.gl) {
throw new Error('Cannot change GL context once initialized');
}
}
/** Runs the callback using the shader. */
run<T>(
gl: WebGL2RenderingContext, flipVertically: boolean,
callback: () => T): T {
this.maybeInitGL(gl);
if (!this.program) {
this.setupShaders();
}
const shaderBuffers = this.getShaderBuffers(flipVertically);
gl.useProgram(this.program!);
shaderBuffers.bind();
const result = callback();
shaderBuffers.unbind();
return result;
}
/**
* Binds a framebuffer to the canvas. If the framebuffer does not yet exist,
* creates it first. Binds the provided texture to the framebuffer.
*/
bindFramebuffer(gl: WebGL2RenderingContext, texture: WebGLTexture): void {
this.maybeInitGL(gl);
if (!this.framebuffer) {
this.framebuffer =
assertNotNull(gl.createFramebuffer(), 'Failed to create framebuffe.');
}
gl.bindFramebuffer(gl.FRAMEBUFFER, this.framebuffer);
gl.framebufferTexture2D(
gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, texture, 0);
}
unbindFramebuffer(): void {
this.gl?.bindFramebuffer(this.gl.FRAMEBUFFER, null);
}
close() {
if (this.program) {
const gl = this.gl!;
gl.deleteProgram(this.program);
gl.deleteShader(this.vertexShader!);
gl.deleteShader(this.fragmentShader!);
}
if (this.framebuffer) {
this.gl!.deleteFramebuffer(this.framebuffer);
}
if (this.shaderBuffersPassthrough) {
this.shaderBuffersPassthrough.close();
}
if (this.shaderBuffersFlipVertically) {
this.shaderBuffersFlipVertically.close();
}
}
}
/** A four channel color with a red, green, blue and alpha values. */
export type RGBAColor = [number, number, number, number];
@ -329,70 +103,6 @@ export interface MPImageChannelConverter {
*/
uint8ToFloatConverter?: (value: number) => number;
}
/**
* Color converter that falls back to a default implementation if the
* user-provided converter does not specify a conversion.
*/
class DefaultColorConverter implements Required<MPImageChannelConverter> {
private static readonly WARNINGS_LOGGED = new Set<string>();
constructor(private readonly customConverter: MPImageChannelConverter) {}
floatToRGBAConverter(v: number): RGBAColor {
if (this.customConverter.floatToRGBAConverter) {
return this.customConverter.floatToRGBAConverter(v);
}
this.logWarningOnce('floatToRGBAConverter');
return [v * 255, v * 255, v * 255, 255];
}
uint8ToRGBAConverter(v: number): RGBAColor {
if (this.customConverter.uint8ToRGBAConverter) {
return this.customConverter.uint8ToRGBAConverter(v);
}
this.logWarningOnce('uint8ToRGBAConverter');
return [v, v, v, 255];
}
rgbaToFloatConverter(r: number, g: number, b: number, a: number): number {
if (this.customConverter.rgbaToFloatConverter) {
return this.customConverter.rgbaToFloatConverter(r, g, b, a);
}
this.logWarningOnce('rgbaToFloatConverter');
return (r / 3 + g / 3 + b / 3) / 255;
}
rgbaToUint8Converter(r: number, g: number, b: number, a: number): number {
if (this.customConverter.rgbaToUint8Converter) {
return this.customConverter.rgbaToUint8Converter(r, g, b, a);
}
this.logWarningOnce('rgbaToUint8Converter');
return r / 3 + g / 3 + b / 3;
}
floatToUint8Converter(v: number): number {
if (this.customConverter.floatToUint8Converter) {
return this.customConverter.floatToUint8Converter(v);
}
this.logWarningOnce('floatToUint8Converter');
return v * 255;
}
uint8ToFloatConverter(v: number): number {
if (this.customConverter.uint8ToFloatConverter) {
return this.customConverter.uint8ToFloatConverter(v);
}
this.logWarningOnce('uint8ToFloatConverter');
return v / 255;
}
private logWarningOnce(methodName: string): void {
if (!DefaultColorConverter.WARNINGS_LOGGED.has(methodName)) {
console.log(`Using default ${methodName}`);
DefaultColorConverter.WARNINGS_LOGGED.add(methodName);
}
}
}
/**
* The wrapper class for MediaPipe Image objects.

View File

@ -0,0 +1,83 @@
/**
* Copyright 2023 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {MPImageChannelConverter, RGBAColor} from '../../../../tasks/web/vision/core/image';
/**
* Color converter that falls back to a default implementation if the
* user-provided converter does not specify a conversion.
*/
export class DefaultColorConverter implements
Required<MPImageChannelConverter> {
private static readonly WARNINGS_LOGGED = new Set<string>();
constructor(private readonly customConverter: MPImageChannelConverter) {}
floatToRGBAConverter(v: number): RGBAColor {
if (this.customConverter.floatToRGBAConverter) {
return this.customConverter.floatToRGBAConverter(v);
}
this.logWarningOnce('floatToRGBAConverter');
return [v * 255, v * 255, v * 255, 255];
}
uint8ToRGBAConverter(v: number): RGBAColor {
if (this.customConverter.uint8ToRGBAConverter) {
return this.customConverter.uint8ToRGBAConverter(v);
}
this.logWarningOnce('uint8ToRGBAConverter');
return [v, v, v, 255];
}
rgbaToFloatConverter(r: number, g: number, b: number, a: number): number {
if (this.customConverter.rgbaToFloatConverter) {
return this.customConverter.rgbaToFloatConverter(r, g, b, a);
}
this.logWarningOnce('rgbaToFloatConverter');
return (r / 3 + g / 3 + b / 3) / 255;
}
rgbaToUint8Converter(r: number, g: number, b: number, a: number): number {
if (this.customConverter.rgbaToUint8Converter) {
return this.customConverter.rgbaToUint8Converter(r, g, b, a);
}
this.logWarningOnce('rgbaToUint8Converter');
return r / 3 + g / 3 + b / 3;
}
floatToUint8Converter(v: number): number {
if (this.customConverter.floatToUint8Converter) {
return this.customConverter.floatToUint8Converter(v);
}
this.logWarningOnce('floatToUint8Converter');
return v * 255;
}
uint8ToFloatConverter(v: number): number {
if (this.customConverter.uint8ToFloatConverter) {
return this.customConverter.uint8ToFloatConverter(v);
}
this.logWarningOnce('uint8ToFloatConverter');
return v / 255;
}
private logWarningOnce(methodName: string): void {
if (!DefaultColorConverter.WARNINGS_LOGGED.has(methodName)) {
console.log(`Using default ${methodName}`);
DefaultColorConverter.WARNINGS_LOGGED.add(methodName);
}
}
}

View File

@ -0,0 +1,243 @@
/**
* Copyright 2023 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const VERTEX_SHADER = `
attribute vec2 aVertex;
attribute vec2 aTex;
varying vec2 vTex;
void main(void) {
gl_Position = vec4(aVertex, 0.0, 1.0);
vTex = aTex;
}`;
const FRAGMENT_SHADER = `
precision mediump float;
varying vec2 vTex;
uniform sampler2D inputTexture;
void main() {
gl_FragColor = texture2D(inputTexture, vTex);
}
`;
/** Helper to assert that `value` is not null. */
export function assertNotNull<T>(value: T|null, msg: string): T {
if (value === null) {
throw new Error(`Unable to obtain required WebGL resource: ${msg}`);
}
return value;
}
/**
* Utility class that encapsulates the buffers used by `MPImageShaderContext`.
* For internal use only.
*/
class MPImageShaderBuffers {
constructor(
private readonly gl: WebGL2RenderingContext,
private readonly vertexArrayObject: WebGLVertexArrayObject,
private readonly vertexBuffer: WebGLBuffer,
private readonly textureBuffer: WebGLBuffer) {}
bind() {
this.gl.bindVertexArray(this.vertexArrayObject);
}
unbind() {
this.gl.bindVertexArray(null);
}
close() {
this.gl.deleteVertexArray(this.vertexArrayObject);
this.gl.deleteBuffer(this.vertexBuffer);
this.gl.deleteBuffer(this.textureBuffer);
}
}
/**
* A class that encapsulates the shaders used by an MPImage. Can be re-used
* across MPImages that use the same WebGL2Rendering context.
*
* For internal use only.
*/
export class MPImageShaderContext {
private gl?: WebGL2RenderingContext;
private framebuffer?: WebGLFramebuffer;
private program?: WebGLProgram;
private vertexShader?: WebGLShader;
private fragmentShader?: WebGLShader;
private aVertex?: GLint;
private aTex?: GLint;
/**
* The shader buffers used for passthrough renders that don't modify the
* input texture.
*/
private shaderBuffersPassthrough?: MPImageShaderBuffers;
/**
* The shader buffers used for passthrough renders that flip the input texture
* vertically before conversion to a different type. This is used to flip the
* texture to the expected orientation for drawing in the browser.
*/
private shaderBuffersFlipVertically?: MPImageShaderBuffers;
private compileShader(source: string, type: number): WebGLShader {
const gl = this.gl!;
const shader =
assertNotNull(gl.createShader(type), 'Failed to create WebGL shader');
gl.shaderSource(shader, source);
gl.compileShader(shader);
if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) {
const info = gl.getShaderInfoLog(shader);
throw new Error(`Could not compile WebGL shader: ${info}`);
}
gl.attachShader(this.program!, shader);
return shader;
}
private setupShaders(): void {
const gl = this.gl!;
this.program =
assertNotNull(gl.createProgram()!, 'Failed to create WebGL program');
this.vertexShader = this.compileShader(VERTEX_SHADER, gl.VERTEX_SHADER);
this.fragmentShader =
this.compileShader(FRAGMENT_SHADER, gl.FRAGMENT_SHADER);
gl.linkProgram(this.program);
const linked = gl.getProgramParameter(this.program, gl.LINK_STATUS);
if (!linked) {
const info = gl.getProgramInfoLog(this.program);
throw new Error(`Error during program linking: ${info}`);
}
this.aVertex = gl.getAttribLocation(this.program, 'aVertex');
this.aTex = gl.getAttribLocation(this.program, 'aTex');
}
private createBuffers(flipVertically: boolean): MPImageShaderBuffers {
const gl = this.gl!;
const vertexArrayObject =
assertNotNull(gl.createVertexArray(), 'Failed to create vertex array');
gl.bindVertexArray(vertexArrayObject);
const vertexBuffer =
assertNotNull(gl.createBuffer(), 'Failed to create buffer');
gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer);
gl.enableVertexAttribArray(this.aVertex!);
gl.vertexAttribPointer(this.aVertex!, 2, gl.FLOAT, false, 0, 0);
gl.bufferData(
gl.ARRAY_BUFFER, new Float32Array([-1, -1, -1, 1, 1, 1, 1, -1]),
gl.STATIC_DRAW);
const textureBuffer =
assertNotNull(gl.createBuffer(), 'Failed to create buffer');
gl.bindBuffer(gl.ARRAY_BUFFER, textureBuffer);
gl.enableVertexAttribArray(this.aTex!);
gl.vertexAttribPointer(this.aTex!, 2, gl.FLOAT, false, 0, 0);
const bufferData =
flipVertically ? [0, 1, 0, 0, 1, 0, 1, 1] : [0, 0, 0, 1, 1, 1, 1, 0];
gl.bufferData(
gl.ARRAY_BUFFER, new Float32Array(bufferData), gl.STATIC_DRAW);
gl.bindBuffer(gl.ARRAY_BUFFER, null);
gl.bindVertexArray(null);
return new MPImageShaderBuffers(
gl, vertexArrayObject, vertexBuffer, textureBuffer);
}
private getShaderBuffers(flipVertically: boolean): MPImageShaderBuffers {
if (flipVertically) {
if (!this.shaderBuffersFlipVertically) {
this.shaderBuffersFlipVertically =
this.createBuffers(/* flipVertically= */ true);
}
return this.shaderBuffersFlipVertically;
} else {
if (!this.shaderBuffersPassthrough) {
this.shaderBuffersPassthrough =
this.createBuffers(/* flipVertically= */ false);
}
return this.shaderBuffersPassthrough;
}
}
private maybeInitGL(gl: WebGL2RenderingContext): void {
if (!this.gl) {
this.gl = gl;
} else if (gl !== this.gl) {
throw new Error('Cannot change GL context once initialized');
}
}
/** Runs the callback using the shader. */
run<T>(
gl: WebGL2RenderingContext, flipVertically: boolean,
callback: () => T): T {
this.maybeInitGL(gl);
if (!this.program) {
this.setupShaders();
}
const shaderBuffers = this.getShaderBuffers(flipVertically);
gl.useProgram(this.program!);
shaderBuffers.bind();
const result = callback();
shaderBuffers.unbind();
return result;
}
/**
* Binds a framebuffer to the canvas. If the framebuffer does not yet exist,
* creates it first. Binds the provided texture to the framebuffer.
*/
bindFramebuffer(gl: WebGL2RenderingContext, texture: WebGLTexture): void {
this.maybeInitGL(gl);
if (!this.framebuffer) {
this.framebuffer =
assertNotNull(gl.createFramebuffer(), 'Failed to create framebuffe.');
}
gl.bindFramebuffer(gl.FRAMEBUFFER, this.framebuffer);
gl.framebufferTexture2D(
gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, texture, 0);
}
unbindFramebuffer(): void {
this.gl?.bindFramebuffer(this.gl.FRAMEBUFFER, null);
}
close() {
if (this.program) {
const gl = this.gl!;
gl.deleteProgram(this.program);
gl.deleteShader(this.vertexShader!);
gl.deleteShader(this.fragmentShader!);
}
if (this.framebuffer) {
this.gl!.deleteFramebuffer(this.framebuffer);
}
if (this.shaderBuffersPassthrough) {
this.shaderBuffersPassthrough.close();
}
if (this.shaderBuffersFlipVertically) {
this.shaderBuffersFlipVertically.close();
}
}
}

View File

@ -17,8 +17,9 @@
import {NormalizedRect} from '../../../../framework/formats/rect_pb';
import {TaskRunner} from '../../../../tasks/web/core/task_runner';
import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset';
import {MPImage, MPImageShaderContext} from '../../../../tasks/web/vision/core/image';
import {MPImage} from '../../../../tasks/web/vision/core/image';
import {ImageProcessingOptions} from '../../../../tasks/web/vision/core/image_processing_options';
import {MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context';
import {GraphRunner, ImageSource, WasmMediaPipeConstructor} from '../../../../web/graph_runner/graph_runner';
import {SupportImage, WasmImage} from '../../../../web/graph_runner/graph_runner_image_lib';
import {isWebKit} from '../../../../web/graph_runner/platform_utils';