From 12b0b6fad104c5a5cb19dbd68e9a708ddaf6cfd7 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 4 May 2023 13:15:01 -0700 Subject: [PATCH] Internal change PiperOrigin-RevId: 529495239 --- README.md | 195 ++++++------ docs/index.md | 195 ++++++------ .../python/vision/object_detector/dataset.py | 3 +- .../vision/object_detector/hyperparameters.py | 18 +- .../face_stylizer/face_stylizer_graph.cc | 7 +- mediapipe/tasks/web/vision/core/BUILD | 6 +- mediapipe/tasks/web/vision/core/image.test.ts | 3 +- mediapipe/tasks/web/vision/core/image.ts | 296 +----------------- .../tasks/web/vision/core/image_converter.ts | 83 +++++ .../web/vision/core/image_shader_context.ts | 243 ++++++++++++++ .../web/vision/core/vision_task_runner.ts | 3 +- 11 files changed, 534 insertions(+), 518 deletions(-) create mode 100644 mediapipe/tasks/web/vision/core/image_converter.ts create mode 100644 mediapipe/tasks/web/vision/core/image_shader_context.ts diff --git a/README.md b/README.md index a82c88ab1..cb3d56de6 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,6 @@ title: Home nav_order: 1 --- -![MediaPipe](https://mediapipe.dev/images/mediapipe_small.png) - ---- **Attention:** *Thanks for your interest in MediaPipe! We have moved to @@ -14,86 +12,111 @@ as the primary developer documentation site for MediaPipe as of April 3, 2023.* *This notice and web page will be removed on June 1, 2023.* ----- +![MediaPipe](https://developers.google.com/static/mediapipe/images/home/hero_01_1920.png) -









-









-









+**Attention**: MediaPipe Solutions Preview is an early release. [Learn +more](https://developers.google.com/mediapipe/solutions/about#notice). --------------------------------------------------------------------------------- +**On-device machine learning for everyone** -## Live ML anywhere +Delight your customers with innovative machine learning features. MediaPipe +contains everything that you need to customize and deploy to mobile (Android, +iOS), web, desktop, edge devices, and IoT, effortlessly. -[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable -ML solutions for live and streaming media. +* [See demos](https://goo.gle/mediapipe-studio) +* [Learn more](https://developers.google.com/mediapipe/solutions) -![accelerated.png](https://mediapipe.dev/images/accelerated_small.png) | ![cross_platform.png](https://mediapipe.dev/images/cross_platform_small.png) -:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------: -***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT* -![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png) -***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable* +## Get started ----- +You can get started with MediaPipe Solutions by by checking out any of the +developer guides for +[vision](https://developers.google.com/mediapipe/solutions/vision/object_detector), +[text](https://developers.google.com/mediapipe/solutions/text/text_classifier), +and +[audio](https://developers.google.com/mediapipe/solutions/audio/audio_classifier) +tasks. If you need help setting up a development environment for use with +MediaPipe Tasks, check out the setup guides for +[Android](https://developers.google.com/mediapipe/solutions/setup_android), [web +apps](https://developers.google.com/mediapipe/solutions/setup_web), and +[Python](https://developers.google.com/mediapipe/solutions/setup_python). -## ML solutions in MediaPipe +## Solutions -Face Detection | Face Mesh | Iris | Hands | Pose | Holistic -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------: -[![face_detection](https://mediapipe.dev/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](https://mediapipe.dev/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](https://mediapipe.dev/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](https://mediapipe.dev/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](https://mediapipe.dev/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](https://mediapipe.dev/images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic) +MediaPipe Solutions provides a suite of libraries and tools for you to quickly +apply artificial intelligence (AI) and machine learning (ML) techniques in your +applications. You can plug these solutions into your applications immediately, +customize them to your needs, and use them across multiple development +platforms. MediaPipe Solutions is part of the MediaPipe [open source +project](https://github.com/google/mediapipe), so you can further customize the +solutions code to meet your application needs. -Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT -:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![hair_segmentation](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](https://mediapipe.dev/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](https://mediapipe.dev/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](https://mediapipe.dev/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](https://mediapipe.dev/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](https://mediapipe.dev/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +These libraries and resources provide the core functionality for each MediaPipe +Solution: - - +* **MediaPipe Tasks**: Cross-platform APIs and libraries for deploying + solutions. [Learn + more](https://developers.google.com/mediapipe/solutions/tasks). +* **MediaPipe models**: Pre-trained, ready-to-run models for use with each + solution. -[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md) -:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | | -[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Selfie Segmentation](https://google.github.io/mediapipe/solutions/selfie_segmentation) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | -[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +These tools let you customize and evaluate solutions: -See also -[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models) -for ML models released in MediaPipe. +* **MediaPipe Model Maker**: Customize models for solutions with your data. + [Learn more](https://developers.google.com/mediapipe/solutions/model_maker). +* **MediaPipe Studio**: Visualize, evaluate, and benchmark solutions in your + browser. [Learn + more](https://developers.google.com/mediapipe/solutions/studio). -## Getting started +### Legacy solutions -To start using MediaPipe -[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few -lines code, see example code and demos in -[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and -[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript). +We have ended support for [these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy) +as of March 1, 2023. All other MediaPipe Legacy Solutions will be upgraded to +a new MediaPipe Solution. See the [Solutions guide](https://developers.google.com/mediapipe/solutions/guide#legacy) +for details. The [code repository](https://github.com/google/mediapipe/tree/master/mediapipe) +and prebuilt binaries for all MediaPipe Legacy Solutions will continue to be +provided on an as-is basis. -To use MediaPipe in C++, Android and iOS, which allow further customization of -the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as -building your own, learn how to -[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and -start building example applications in -[C++](https://google.github.io/mediapipe/getting_started/cpp), -[Android](https://google.github.io/mediapipe/getting_started/android) and -[iOS](https://google.github.io/mediapipe/getting_started/ios). +For more on the legacy solutions, see the [documentation](https://github.com/google/mediapipe/tree/master/docs/solutions). -The source code is hosted in the -[MediaPipe Github repository](https://github.com/google/mediapipe), and you can -run code search using -[Google Open Source Code Search](https://cs.opensource.google/mediapipe/mediapipe). +## Framework -## Publications +To start using MediaPipe Framework, [install MediaPipe +Framework](https://developers.google.com/mediapipe/framework/getting_started/install) +and start building example applications in C++, Android, and iOS. + +[MediaPipe Framework](https://developers.google.com/mediapipe/framework) is the +low-level component used to build efficient on-device machine learning +pipelines, similar to the premade MediaPipe Solutions. + +Before using MediaPipe Framework, familiarize yourself with the following key +[Framework +concepts](https://developers.google.com/mediapipe/framework/framework_concepts/overview.md): + +* [Packets](https://developers.google.com/mediapipe/framework/framework_concepts/packets.md) +* [Graphs](https://developers.google.com/mediapipe/framework/framework_concepts/graphs.md) +* [Calculators](https://developers.google.com/mediapipe/framework/framework_concepts/calculators.md) + +## Community + +* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe + users. +* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General + community discussion around MediaPipe. +* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A + curated list of awesome MediaPipe related frameworks, libraries and + software. + +## Contributing + +We welcome contributions. Please follow these +[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md). + +We use GitHub issues for tracking requests and bugs. Please post questions to +the MediaPipe Stack Overflow with a `mediapipe` tag. + +## Resources + +### Publications * [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html) in Google Developers Blog @@ -102,7 +125,8 @@ run code search using * [SignAll SDK: Sign language interface using MediaPipe is now available for developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html) in Google Developers Blog -* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) +* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on + Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) in Google AI Blog * [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) in Google AI Blog @@ -130,43 +154,6 @@ run code search using in Google AI Blog * [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172) -## Videos +### Videos * [YouTube Channel](https://www.youtube.com/c/MediaPipe) - -## Events - -* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020) -* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/) -* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/) -* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038) -* [ML Conference, Berlin, 11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/) -* [MediaPipe Berlin Meetup, Google Berlin, 11 Dec 2019](https://www.meetup.com/Berlin-AI-Tech-Talk/events/266328794/) -* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop, - Seoul, Korea ICCV - 2019](https://research.google.com/youtube8m/workshop2019/index.html) -* [AI DevWorld 2019, 10 Oct 2019, San Jose, CA](https://aidevworld.com) -* [Google Industry Workshop at ICIP 2019, 24 Sept 2019, Taipei, Taiwan](http://2019.ieeeicip.org/?action=page4&id=14#Google) - ([presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5)) -* [Open sourced at CVPR 2019, 17~20 June, Long Beach, CA](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) - -## Community - -* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A - curated list of awesome MediaPipe related frameworks, libraries and software -* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users -* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General - community discussion around MediaPipe - -## Alpha disclaimer - -MediaPipe is currently in alpha at v0.7. We may be still making breaking API -changes and expect to get to stable APIs by v1.0. - -## Contributing - -We welcome contributions. Please follow these -[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md). - -We use GitHub issues for tracking requests and bugs. Please post questions to -the MediaPipe Stack Overflow with a `mediapipe` tag. diff --git a/docs/index.md b/docs/index.md index a82c88ab1..cb3d56de6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,8 +4,6 @@ title: Home nav_order: 1 --- -![MediaPipe](https://mediapipe.dev/images/mediapipe_small.png) - ---- **Attention:** *Thanks for your interest in MediaPipe! We have moved to @@ -14,86 +12,111 @@ as the primary developer documentation site for MediaPipe as of April 3, 2023.* *This notice and web page will be removed on June 1, 2023.* ----- +![MediaPipe](https://developers.google.com/static/mediapipe/images/home/hero_01_1920.png) -









-









-









+**Attention**: MediaPipe Solutions Preview is an early release. [Learn +more](https://developers.google.com/mediapipe/solutions/about#notice). --------------------------------------------------------------------------------- +**On-device machine learning for everyone** -## Live ML anywhere +Delight your customers with innovative machine learning features. MediaPipe +contains everything that you need to customize and deploy to mobile (Android, +iOS), web, desktop, edge devices, and IoT, effortlessly. -[MediaPipe](https://google.github.io/mediapipe/) offers cross-platform, customizable -ML solutions for live and streaming media. +* [See demos](https://goo.gle/mediapipe-studio) +* [Learn more](https://developers.google.com/mediapipe/solutions) -![accelerated.png](https://mediapipe.dev/images/accelerated_small.png) | ![cross_platform.png](https://mediapipe.dev/images/cross_platform_small.png) -:------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------: -***End-to-End acceleration***: *Built-in fast ML inference and processing accelerated even on common hardware* | ***Build once, deploy anywhere***: *Unified solution works across Android, iOS, desktop/cloud, web and IoT* -![ready_to_use.png](https://mediapipe.dev/images/ready_to_use_small.png) | ![open_source.png](https://mediapipe.dev/images/open_source_small.png) -***Ready-to-use solutions***: *Cutting-edge ML solutions demonstrating full power of the framework* | ***Free and open source***: *Framework and solutions both under Apache 2.0, fully extensible and customizable* +## Get started ----- +You can get started with MediaPipe Solutions by by checking out any of the +developer guides for +[vision](https://developers.google.com/mediapipe/solutions/vision/object_detector), +[text](https://developers.google.com/mediapipe/solutions/text/text_classifier), +and +[audio](https://developers.google.com/mediapipe/solutions/audio/audio_classifier) +tasks. If you need help setting up a development environment for use with +MediaPipe Tasks, check out the setup guides for +[Android](https://developers.google.com/mediapipe/solutions/setup_android), [web +apps](https://developers.google.com/mediapipe/solutions/setup_web), and +[Python](https://developers.google.com/mediapipe/solutions/setup_python). -## ML solutions in MediaPipe +## Solutions -Face Detection | Face Mesh | Iris | Hands | Pose | Holistic -:----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: | :------: -[![face_detection](https://mediapipe.dev/images/mobile/face_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_detection) | [![face_mesh](https://mediapipe.dev/images/mobile/face_mesh_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/face_mesh) | [![iris](https://mediapipe.dev/images/mobile/iris_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/iris) | [![hand](https://mediapipe.dev/images/mobile/hand_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hands) | [![pose](https://mediapipe.dev/images/mobile/pose_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/pose) | [![hair_segmentation](https://mediapipe.dev/images/mobile/holistic_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/holistic) +MediaPipe Solutions provides a suite of libraries and tools for you to quickly +apply artificial intelligence (AI) and machine learning (ML) techniques in your +applications. You can plug these solutions into your applications immediately, +customize them to your needs, and use them across multiple development +platforms. MediaPipe Solutions is part of the MediaPipe [open source +project](https://github.com/google/mediapipe), so you can further customize the +solutions code to meet your application needs. -Hair Segmentation | Object Detection | Box Tracking | Instant Motion Tracking | Objectron | KNIFT -:-------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: | :---: -[![hair_segmentation](https://mediapipe.dev/images/mobile/hair_segmentation_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/hair_segmentation) | [![object_detection](https://mediapipe.dev/images/mobile/object_detection_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/object_detection) | [![box_tracking](https://mediapipe.dev/images/mobile/object_tracking_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/box_tracking) | [![instant_motion_tracking](https://mediapipe.dev/images/mobile/instant_motion_tracking_android_small.gif)](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | [![objectron](https://mediapipe.dev/images/mobile/objectron_chair_android_gpu_small.gif)](https://google.github.io/mediapipe/solutions/objectron) | [![knift](https://mediapipe.dev/images/mobile/template_matching_android_cpu_small.gif)](https://google.github.io/mediapipe/solutions/knift) +These libraries and resources provide the core functionality for each MediaPipe +Solution: - - +* **MediaPipe Tasks**: Cross-platform APIs and libraries for deploying + solutions. [Learn + more](https://developers.google.com/mediapipe/solutions/tasks). +* **MediaPipe models**: Pre-trained, ready-to-run models for use with each + solution. -[]() | [Android](https://google.github.io/mediapipe/getting_started/android) | [iOS](https://google.github.io/mediapipe/getting_started/ios) | [C++](https://google.github.io/mediapipe/getting_started/cpp) | [Python](https://google.github.io/mediapipe/getting_started/python) | [JS](https://google.github.io/mediapipe/getting_started/javascript) | [Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/README.md) -:---------------------------------------------------------------------------------------- | :-------------------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------------------: | :-----------------------------------------------------------: | :--------------------------------------------------------------------: -[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ -[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | | -[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Holistic](https://google.github.io/mediapipe/solutions/holistic) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Selfie Segmentation](https://google.github.io/mediapipe/solutions/selfie_segmentation) | ✅ | ✅ | ✅ | ✅ | ✅ | -[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | | -[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅ -[Box Tracking](https://google.github.io/mediapipe/solutions/box_tracking) | ✅ | ✅ | ✅ | | | -[Instant Motion Tracking](https://google.github.io/mediapipe/solutions/instant_motion_tracking) | ✅ | | | | | -[Objectron](https://google.github.io/mediapipe/solutions/objectron) | ✅ | | ✅ | ✅ | ✅ | -[KNIFT](https://google.github.io/mediapipe/solutions/knift) | ✅ | | | | | -[AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | -[MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | -[YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +These tools let you customize and evaluate solutions: -See also -[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models) -for ML models released in MediaPipe. +* **MediaPipe Model Maker**: Customize models for solutions with your data. + [Learn more](https://developers.google.com/mediapipe/solutions/model_maker). +* **MediaPipe Studio**: Visualize, evaluate, and benchmark solutions in your + browser. [Learn + more](https://developers.google.com/mediapipe/solutions/studio). -## Getting started +### Legacy solutions -To start using MediaPipe -[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few -lines code, see example code and demos in -[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and -[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript). +We have ended support for [these MediaPipe Legacy Solutions](https://developers.google.com/mediapipe/solutions/guide#legacy) +as of March 1, 2023. All other MediaPipe Legacy Solutions will be upgraded to +a new MediaPipe Solution. See the [Solutions guide](https://developers.google.com/mediapipe/solutions/guide#legacy) +for details. The [code repository](https://github.com/google/mediapipe/tree/master/mediapipe) +and prebuilt binaries for all MediaPipe Legacy Solutions will continue to be +provided on an as-is basis. -To use MediaPipe in C++, Android and iOS, which allow further customization of -the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as -building your own, learn how to -[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and -start building example applications in -[C++](https://google.github.io/mediapipe/getting_started/cpp), -[Android](https://google.github.io/mediapipe/getting_started/android) and -[iOS](https://google.github.io/mediapipe/getting_started/ios). +For more on the legacy solutions, see the [documentation](https://github.com/google/mediapipe/tree/master/docs/solutions). -The source code is hosted in the -[MediaPipe Github repository](https://github.com/google/mediapipe), and you can -run code search using -[Google Open Source Code Search](https://cs.opensource.google/mediapipe/mediapipe). +## Framework -## Publications +To start using MediaPipe Framework, [install MediaPipe +Framework](https://developers.google.com/mediapipe/framework/getting_started/install) +and start building example applications in C++, Android, and iOS. + +[MediaPipe Framework](https://developers.google.com/mediapipe/framework) is the +low-level component used to build efficient on-device machine learning +pipelines, similar to the premade MediaPipe Solutions. + +Before using MediaPipe Framework, familiarize yourself with the following key +[Framework +concepts](https://developers.google.com/mediapipe/framework/framework_concepts/overview.md): + +* [Packets](https://developers.google.com/mediapipe/framework/framework_concepts/packets.md) +* [Graphs](https://developers.google.com/mediapipe/framework/framework_concepts/graphs.md) +* [Calculators](https://developers.google.com/mediapipe/framework/framework_concepts/calculators.md) + +## Community + +* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe + users. +* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General + community discussion around MediaPipe. +* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A + curated list of awesome MediaPipe related frameworks, libraries and + software. + +## Contributing + +We welcome contributions. Please follow these +[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md). + +We use GitHub issues for tracking requests and bugs. Please post questions to +the MediaPipe Stack Overflow with a `mediapipe` tag. + +## Resources + +### Publications * [Bringing artworks to life with AR](https://developers.googleblog.com/2021/07/bringing-artworks-to-life-with-ar.html) in Google Developers Blog @@ -102,7 +125,8 @@ run code search using * [SignAll SDK: Sign language interface using MediaPipe is now available for developers](https://developers.googleblog.com/2021/04/signall-sdk-sign-language-interface-using-mediapipe-now-available.html) in Google Developers Blog -* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) +* [MediaPipe Holistic - Simultaneous Face, Hand and Pose Prediction, on + Device](https://ai.googleblog.com/2020/12/mediapipe-holistic-simultaneous-face.html) in Google AI Blog * [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html) in Google AI Blog @@ -130,43 +154,6 @@ run code search using in Google AI Blog * [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172) -## Videos +### Videos * [YouTube Channel](https://www.youtube.com/c/MediaPipe) - -## Events - -* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020) -* [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/) -* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/) -* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038) -* [ML Conference, Berlin, 11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/) -* [MediaPipe Berlin Meetup, Google Berlin, 11 Dec 2019](https://www.meetup.com/Berlin-AI-Tech-Talk/events/266328794/) -* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop, - Seoul, Korea ICCV - 2019](https://research.google.com/youtube8m/workshop2019/index.html) -* [AI DevWorld 2019, 10 Oct 2019, San Jose, CA](https://aidevworld.com) -* [Google Industry Workshop at ICIP 2019, 24 Sept 2019, Taipei, Taiwan](http://2019.ieeeicip.org/?action=page4&id=14#Google) - ([presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5)) -* [Open sourced at CVPR 2019, 17~20 June, Long Beach, CA](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) - -## Community - -* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A - curated list of awesome MediaPipe related frameworks, libraries and software -* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users -* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General - community discussion around MediaPipe - -## Alpha disclaimer - -MediaPipe is currently in alpha at v0.7. We may be still making breaking API -changes and expect to get to stable APIs by v1.0. - -## Contributing - -We welcome contributions. Please follow these -[guidelines](https://github.com/google/mediapipe/blob/master/CONTRIBUTING.md). - -We use GitHub issues for tracking requests and bugs. Please post questions to -the MediaPipe Stack Overflow with a `mediapipe` tag. diff --git a/mediapipe/model_maker/python/vision/object_detector/dataset.py b/mediapipe/model_maker/python/vision/object_detector/dataset.py index 6899d8612..c18a071b2 100644 --- a/mediapipe/model_maker/python/vision/object_detector/dataset.py +++ b/mediapipe/model_maker/python/vision/object_detector/dataset.py @@ -106,7 +106,7 @@ class Dataset(classification_dataset.ClassificationDataset): ... Each .xml annotation file should have the following format: - file0.jpg + file0.jpg kangaroo @@ -114,6 +114,7 @@ class Dataset(classification_dataset.ClassificationDataset): 89 386 262 + ... diff --git a/mediapipe/model_maker/python/vision/object_detector/hyperparameters.py b/mediapipe/model_maker/python/vision/object_detector/hyperparameters.py index 1bc7514f2..35fb630ae 100644 --- a/mediapipe/model_maker/python/vision/object_detector/hyperparameters.py +++ b/mediapipe/model_maker/python/vision/object_detector/hyperparameters.py @@ -27,8 +27,6 @@ class HParams(hp.BaseHParams): learning_rate: Learning rate to use for gradient descent training. batch_size: Batch size for training. epochs: Number of training iterations over the dataset. - do_fine_tuning: If true, the base module is trained together with the - classification layer on top. cosine_decay_epochs: The number of epochs for cosine decay learning rate. See https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/CosineDecay @@ -39,13 +37,13 @@ class HParams(hp.BaseHParams): """ # Parameters from BaseHParams class. - learning_rate: float = 0.003 - batch_size: int = 32 - epochs: int = 10 + learning_rate: float = 0.3 + batch_size: int = 8 + epochs: int = 30 # Parameters for cosine learning rate decay cosine_decay_epochs: Optional[int] = None - cosine_decay_alpha: float = 0.0 + cosine_decay_alpha: float = 1.0 @dataclasses.dataclass @@ -67,8 +65,8 @@ class QATHParams: for more information. """ - learning_rate: float = 0.03 - batch_size: int = 32 - epochs: int = 10 - decay_steps: int = 231 + learning_rate: float = 0.3 + batch_size: int = 8 + epochs: int = 15 + decay_steps: int = 8 decay_rate: float = 0.96 diff --git a/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer_graph.cc b/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer_graph.cc index cb49ef59d..d7265a146 100644 --- a/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer_graph.cc +++ b/mediapipe/tasks/cc/vision/face_stylizer/face_stylizer_graph.cc @@ -361,9 +361,10 @@ class FaceStylizerGraph : public core::ModelTaskGraph { auto& tensors_to_image = graph.AddNode("mediapipe.tasks.TensorsToImageCalculator"); - ConfigureTensorsToImageCalculator( - image_to_tensor_options, - &tensors_to_image.GetOptions()); + auto& tensors_to_image_options = + tensors_to_image.GetOptions(); + tensors_to_image_options.mutable_input_tensor_float_range()->set_min(-1); + tensors_to_image_options.mutable_input_tensor_float_range()->set_max(1); face_alignment_image >> tensors_to_image.In(kTensorsTag); face_alignment = tensors_to_image.Out(kImageTag).Cast(); diff --git a/mediapipe/tasks/web/vision/core/BUILD b/mediapipe/tasks/web/vision/core/BUILD index c53247ba7..05a5b3b83 100644 --- a/mediapipe/tasks/web/vision/core/BUILD +++ b/mediapipe/tasks/web/vision/core/BUILD @@ -41,7 +41,11 @@ mediapipe_ts_library( mediapipe_ts_library( name = "image", - srcs = ["image.ts"], + srcs = [ + "image.ts", + "image_converter.ts", + "image_shader_context.ts", + ], ) mediapipe_ts_library( diff --git a/mediapipe/tasks/web/vision/core/image.test.ts b/mediapipe/tasks/web/vision/core/image.test.ts index 73eb44240..da3bd76b2 100644 --- a/mediapipe/tasks/web/vision/core/image.test.ts +++ b/mediapipe/tasks/web/vision/core/image.test.ts @@ -16,7 +16,8 @@ import 'jasmine'; -import {MPImage, MPImageShaderContext, MPImageType} from './image'; +import {MPImage, MPImageType} from './image'; +import {MPImageShaderContext} from './image_shader_context'; const WIDTH = 2; const HEIGHT = 2; diff --git a/mediapipe/tasks/web/vision/core/image.ts b/mediapipe/tasks/web/vision/core/image.ts index 7d6997d37..e2b21c0e6 100644 --- a/mediapipe/tasks/web/vision/core/image.ts +++ b/mediapipe/tasks/web/vision/core/image.ts @@ -14,6 +14,9 @@ * limitations under the License. */ +import {DefaultColorConverter} from '../../../../tasks/web/vision/core/image_converter'; +import {assertNotNull, MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; + /** The underlying type of the image. */ export enum MPImageType { /** Represents the native `UInt8ClampedArray` type. */ @@ -34,235 +37,6 @@ export enum MPImageType { export type MPImageContainer = Uint8ClampedArray|Float32Array|ImageData|ImageBitmap|WebGLTexture; -const VERTEX_SHADER = ` - attribute vec2 aVertex; - attribute vec2 aTex; - varying vec2 vTex; - void main(void) { - gl_Position = vec4(aVertex, 0.0, 1.0); - vTex = aTex; - }`; - -const FRAGMENT_SHADER = ` - precision mediump float; - varying vec2 vTex; - uniform sampler2D inputTexture; - void main() { - gl_FragColor = texture2D(inputTexture, vTex); - } - `; - -function assertNotNull(value: T|null, msg: string): T { - if (value === null) { - throw new Error(`Unable to obtain required WebGL resource: ${msg}`); - } - return value; -} - -// TODO: Move internal-only types to different module. - -/** - * Utility class that encapsulates the buffers used by `MPImageShaderContext`. - * For internal use only. - */ -class MPImageShaderBuffers { - constructor( - private readonly gl: WebGL2RenderingContext, - private readonly vertexArrayObject: WebGLVertexArrayObject, - private readonly vertexBuffer: WebGLBuffer, - private readonly textureBuffer: WebGLBuffer) {} - - bind() { - this.gl.bindVertexArray(this.vertexArrayObject); - } - - unbind() { - this.gl.bindVertexArray(null); - } - - close() { - this.gl.deleteVertexArray(this.vertexArrayObject); - this.gl.deleteBuffer(this.vertexBuffer); - this.gl.deleteBuffer(this.textureBuffer); - } -} - -/** - * A class that encapsulates the shaders used by an MPImage. Can be re-used - * across MPImages that use the same WebGL2Rendering context. - * - * For internal use only. - */ -export class MPImageShaderContext { - private gl?: WebGL2RenderingContext; - private framebuffer?: WebGLFramebuffer; - private program?: WebGLProgram; - private vertexShader?: WebGLShader; - private fragmentShader?: WebGLShader; - private aVertex?: GLint; - private aTex?: GLint; - - /** - * The shader buffers used for passthrough renders that don't modify the - * input texture. - */ - private shaderBuffersPassthrough?: MPImageShaderBuffers; - - /** - * The shader buffers used for passthrough renders that flip the input texture - * vertically before conversion to a different type. This is used to flip the - * texture to the expected orientation for drawing in the browser. - */ - private shaderBuffersFlipVertically?: MPImageShaderBuffers; - - private compileShader(source: string, type: number): WebGLShader { - const gl = this.gl!; - const shader = - assertNotNull(gl.createShader(type), 'Failed to create WebGL shader'); - gl.shaderSource(shader, source); - gl.compileShader(shader); - if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) { - const info = gl.getShaderInfoLog(shader); - throw new Error(`Could not compile WebGL shader: ${info}`); - } - gl.attachShader(this.program!, shader); - return shader; - } - - private setupShaders(): void { - const gl = this.gl!; - this.program = - assertNotNull(gl.createProgram()!, 'Failed to create WebGL program'); - - this.vertexShader = this.compileShader(VERTEX_SHADER, gl.VERTEX_SHADER); - this.fragmentShader = - this.compileShader(FRAGMENT_SHADER, gl.FRAGMENT_SHADER); - - gl.linkProgram(this.program); - const linked = gl.getProgramParameter(this.program, gl.LINK_STATUS); - if (!linked) { - const info = gl.getProgramInfoLog(this.program); - throw new Error(`Error during program linking: ${info}`); - } - - this.aVertex = gl.getAttribLocation(this.program, 'aVertex'); - this.aTex = gl.getAttribLocation(this.program, 'aTex'); - } - - private createBuffers(flipVertically: boolean): MPImageShaderBuffers { - const gl = this.gl!; - const vertexArrayObject = - assertNotNull(gl.createVertexArray(), 'Failed to create vertex array'); - gl.bindVertexArray(vertexArrayObject); - - const vertexBuffer = - assertNotNull(gl.createBuffer(), 'Failed to create buffer'); - gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer); - gl.enableVertexAttribArray(this.aVertex!); - gl.vertexAttribPointer(this.aVertex!, 2, gl.FLOAT, false, 0, 0); - gl.bufferData( - gl.ARRAY_BUFFER, new Float32Array([-1, -1, -1, 1, 1, 1, 1, -1]), - gl.STATIC_DRAW); - - const textureBuffer = - assertNotNull(gl.createBuffer(), 'Failed to create buffer'); - gl.bindBuffer(gl.ARRAY_BUFFER, textureBuffer); - gl.enableVertexAttribArray(this.aTex!); - gl.vertexAttribPointer(this.aTex!, 2, gl.FLOAT, false, 0, 0); - - const bufferData = - flipVertically ? [0, 1, 0, 0, 1, 0, 1, 1] : [0, 0, 0, 1, 1, 1, 1, 0]; - gl.bufferData( - gl.ARRAY_BUFFER, new Float32Array(bufferData), gl.STATIC_DRAW); - - gl.bindBuffer(gl.ARRAY_BUFFER, null); - gl.bindVertexArray(null); - - return new MPImageShaderBuffers( - gl, vertexArrayObject, vertexBuffer, textureBuffer); - } - - private getShaderBuffers(flipVertically: boolean): MPImageShaderBuffers { - if (flipVertically) { - if (!this.shaderBuffersFlipVertically) { - this.shaderBuffersFlipVertically = - this.createBuffers(/* flipVertically= */ true); - } - return this.shaderBuffersFlipVertically; - } else { - if (!this.shaderBuffersPassthrough) { - this.shaderBuffersPassthrough = - this.createBuffers(/* flipVertically= */ false); - } - return this.shaderBuffersPassthrough; - } - } - - private maybeInitGL(gl: WebGL2RenderingContext): void { - if (!this.gl) { - this.gl = gl; - } else if (gl !== this.gl) { - throw new Error('Cannot change GL context once initialized'); - } - } - - /** Runs the callback using the shader. */ - run( - gl: WebGL2RenderingContext, flipVertically: boolean, - callback: () => T): T { - this.maybeInitGL(gl); - - if (!this.program) { - this.setupShaders(); - } - - const shaderBuffers = this.getShaderBuffers(flipVertically); - gl.useProgram(this.program!); - shaderBuffers.bind(); - const result = callback(); - shaderBuffers.unbind(); - - return result; - } - - /** - * Binds a framebuffer to the canvas. If the framebuffer does not yet exist, - * creates it first. Binds the provided texture to the framebuffer. - */ - bindFramebuffer(gl: WebGL2RenderingContext, texture: WebGLTexture): void { - this.maybeInitGL(gl); - if (!this.framebuffer) { - this.framebuffer = - assertNotNull(gl.createFramebuffer(), 'Failed to create framebuffe.'); - } - gl.bindFramebuffer(gl.FRAMEBUFFER, this.framebuffer); - gl.framebufferTexture2D( - gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, texture, 0); - } - - unbindFramebuffer(): void { - this.gl?.bindFramebuffer(this.gl.FRAMEBUFFER, null); - } - - close() { - if (this.program) { - const gl = this.gl!; - gl.deleteProgram(this.program); - gl.deleteShader(this.vertexShader!); - gl.deleteShader(this.fragmentShader!); - } - if (this.framebuffer) { - this.gl!.deleteFramebuffer(this.framebuffer); - } - if (this.shaderBuffersPassthrough) { - this.shaderBuffersPassthrough.close(); - } - if (this.shaderBuffersFlipVertically) { - this.shaderBuffersFlipVertically.close(); - } - } -} - /** A four channel color with a red, green, blue and alpha values. */ export type RGBAColor = [number, number, number, number]; @@ -329,70 +103,6 @@ export interface MPImageChannelConverter { */ uint8ToFloatConverter?: (value: number) => number; } -/** - * Color converter that falls back to a default implementation if the - * user-provided converter does not specify a conversion. - */ -class DefaultColorConverter implements Required { - private static readonly WARNINGS_LOGGED = new Set(); - - constructor(private readonly customConverter: MPImageChannelConverter) {} - - floatToRGBAConverter(v: number): RGBAColor { - if (this.customConverter.floatToRGBAConverter) { - return this.customConverter.floatToRGBAConverter(v); - } - this.logWarningOnce('floatToRGBAConverter'); - return [v * 255, v * 255, v * 255, 255]; - } - - uint8ToRGBAConverter(v: number): RGBAColor { - if (this.customConverter.uint8ToRGBAConverter) { - return this.customConverter.uint8ToRGBAConverter(v); - } - this.logWarningOnce('uint8ToRGBAConverter'); - return [v, v, v, 255]; - } - - rgbaToFloatConverter(r: number, g: number, b: number, a: number): number { - if (this.customConverter.rgbaToFloatConverter) { - return this.customConverter.rgbaToFloatConverter(r, g, b, a); - } - this.logWarningOnce('rgbaToFloatConverter'); - return (r / 3 + g / 3 + b / 3) / 255; - } - - rgbaToUint8Converter(r: number, g: number, b: number, a: number): number { - if (this.customConverter.rgbaToUint8Converter) { - return this.customConverter.rgbaToUint8Converter(r, g, b, a); - } - this.logWarningOnce('rgbaToUint8Converter'); - return r / 3 + g / 3 + b / 3; - } - - floatToUint8Converter(v: number): number { - if (this.customConverter.floatToUint8Converter) { - return this.customConverter.floatToUint8Converter(v); - } - this.logWarningOnce('floatToUint8Converter'); - return v * 255; - } - - uint8ToFloatConverter(v: number): number { - if (this.customConverter.uint8ToFloatConverter) { - return this.customConverter.uint8ToFloatConverter(v); - } - this.logWarningOnce('uint8ToFloatConverter'); - return v / 255; - } - - private logWarningOnce(methodName: string): void { - if (!DefaultColorConverter.WARNINGS_LOGGED.has(methodName)) { - console.log(`Using default ${methodName}`); - DefaultColorConverter.WARNINGS_LOGGED.add(methodName); - } - } -} /** * The wrapper class for MediaPipe Image objects. diff --git a/mediapipe/tasks/web/vision/core/image_converter.ts b/mediapipe/tasks/web/vision/core/image_converter.ts new file mode 100644 index 000000000..348b89b82 --- /dev/null +++ b/mediapipe/tasks/web/vision/core/image_converter.ts @@ -0,0 +1,83 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {MPImageChannelConverter, RGBAColor} from '../../../../tasks/web/vision/core/image'; + +/** + * Color converter that falls back to a default implementation if the + * user-provided converter does not specify a conversion. + */ +export class DefaultColorConverter implements + Required { + private static readonly WARNINGS_LOGGED = new Set(); + + constructor(private readonly customConverter: MPImageChannelConverter) {} + + floatToRGBAConverter(v: number): RGBAColor { + if (this.customConverter.floatToRGBAConverter) { + return this.customConverter.floatToRGBAConverter(v); + } + this.logWarningOnce('floatToRGBAConverter'); + return [v * 255, v * 255, v * 255, 255]; + } + + uint8ToRGBAConverter(v: number): RGBAColor { + if (this.customConverter.uint8ToRGBAConverter) { + return this.customConverter.uint8ToRGBAConverter(v); + } + this.logWarningOnce('uint8ToRGBAConverter'); + return [v, v, v, 255]; + } + + rgbaToFloatConverter(r: number, g: number, b: number, a: number): number { + if (this.customConverter.rgbaToFloatConverter) { + return this.customConverter.rgbaToFloatConverter(r, g, b, a); + } + this.logWarningOnce('rgbaToFloatConverter'); + return (r / 3 + g / 3 + b / 3) / 255; + } + + rgbaToUint8Converter(r: number, g: number, b: number, a: number): number { + if (this.customConverter.rgbaToUint8Converter) { + return this.customConverter.rgbaToUint8Converter(r, g, b, a); + } + this.logWarningOnce('rgbaToUint8Converter'); + return r / 3 + g / 3 + b / 3; + } + + floatToUint8Converter(v: number): number { + if (this.customConverter.floatToUint8Converter) { + return this.customConverter.floatToUint8Converter(v); + } + this.logWarningOnce('floatToUint8Converter'); + return v * 255; + } + + uint8ToFloatConverter(v: number): number { + if (this.customConverter.uint8ToFloatConverter) { + return this.customConverter.uint8ToFloatConverter(v); + } + this.logWarningOnce('uint8ToFloatConverter'); + return v / 255; + } + + private logWarningOnce(methodName: string): void { + if (!DefaultColorConverter.WARNINGS_LOGGED.has(methodName)) { + console.log(`Using default ${methodName}`); + DefaultColorConverter.WARNINGS_LOGGED.add(methodName); + } + } +} diff --git a/mediapipe/tasks/web/vision/core/image_shader_context.ts b/mediapipe/tasks/web/vision/core/image_shader_context.ts new file mode 100644 index 000000000..eb17d001a --- /dev/null +++ b/mediapipe/tasks/web/vision/core/image_shader_context.ts @@ -0,0 +1,243 @@ +/** + * Copyright 2023 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const VERTEX_SHADER = ` + attribute vec2 aVertex; + attribute vec2 aTex; + varying vec2 vTex; + void main(void) { + gl_Position = vec4(aVertex, 0.0, 1.0); + vTex = aTex; + }`; + +const FRAGMENT_SHADER = ` + precision mediump float; + varying vec2 vTex; + uniform sampler2D inputTexture; + void main() { + gl_FragColor = texture2D(inputTexture, vTex); + } + `; + +/** Helper to assert that `value` is not null. */ +export function assertNotNull(value: T|null, msg: string): T { + if (value === null) { + throw new Error(`Unable to obtain required WebGL resource: ${msg}`); + } + return value; +} + +/** + * Utility class that encapsulates the buffers used by `MPImageShaderContext`. + * For internal use only. + */ +class MPImageShaderBuffers { + constructor( + private readonly gl: WebGL2RenderingContext, + private readonly vertexArrayObject: WebGLVertexArrayObject, + private readonly vertexBuffer: WebGLBuffer, + private readonly textureBuffer: WebGLBuffer) {} + + bind() { + this.gl.bindVertexArray(this.vertexArrayObject); + } + + unbind() { + this.gl.bindVertexArray(null); + } + + close() { + this.gl.deleteVertexArray(this.vertexArrayObject); + this.gl.deleteBuffer(this.vertexBuffer); + this.gl.deleteBuffer(this.textureBuffer); + } +} + +/** + * A class that encapsulates the shaders used by an MPImage. Can be re-used + * across MPImages that use the same WebGL2Rendering context. + * + * For internal use only. + */ +export class MPImageShaderContext { + private gl?: WebGL2RenderingContext; + private framebuffer?: WebGLFramebuffer; + private program?: WebGLProgram; + private vertexShader?: WebGLShader; + private fragmentShader?: WebGLShader; + private aVertex?: GLint; + private aTex?: GLint; + + /** + * The shader buffers used for passthrough renders that don't modify the + * input texture. + */ + private shaderBuffersPassthrough?: MPImageShaderBuffers; + + /** + * The shader buffers used for passthrough renders that flip the input texture + * vertically before conversion to a different type. This is used to flip the + * texture to the expected orientation for drawing in the browser. + */ + private shaderBuffersFlipVertically?: MPImageShaderBuffers; + + private compileShader(source: string, type: number): WebGLShader { + const gl = this.gl!; + const shader = + assertNotNull(gl.createShader(type), 'Failed to create WebGL shader'); + gl.shaderSource(shader, source); + gl.compileShader(shader); + if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) { + const info = gl.getShaderInfoLog(shader); + throw new Error(`Could not compile WebGL shader: ${info}`); + } + gl.attachShader(this.program!, shader); + return shader; + } + + private setupShaders(): void { + const gl = this.gl!; + this.program = + assertNotNull(gl.createProgram()!, 'Failed to create WebGL program'); + + this.vertexShader = this.compileShader(VERTEX_SHADER, gl.VERTEX_SHADER); + this.fragmentShader = + this.compileShader(FRAGMENT_SHADER, gl.FRAGMENT_SHADER); + + gl.linkProgram(this.program); + const linked = gl.getProgramParameter(this.program, gl.LINK_STATUS); + if (!linked) { + const info = gl.getProgramInfoLog(this.program); + throw new Error(`Error during program linking: ${info}`); + } + + this.aVertex = gl.getAttribLocation(this.program, 'aVertex'); + this.aTex = gl.getAttribLocation(this.program, 'aTex'); + } + + private createBuffers(flipVertically: boolean): MPImageShaderBuffers { + const gl = this.gl!; + const vertexArrayObject = + assertNotNull(gl.createVertexArray(), 'Failed to create vertex array'); + gl.bindVertexArray(vertexArrayObject); + + const vertexBuffer = + assertNotNull(gl.createBuffer(), 'Failed to create buffer'); + gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer); + gl.enableVertexAttribArray(this.aVertex!); + gl.vertexAttribPointer(this.aVertex!, 2, gl.FLOAT, false, 0, 0); + gl.bufferData( + gl.ARRAY_BUFFER, new Float32Array([-1, -1, -1, 1, 1, 1, 1, -1]), + gl.STATIC_DRAW); + + const textureBuffer = + assertNotNull(gl.createBuffer(), 'Failed to create buffer'); + gl.bindBuffer(gl.ARRAY_BUFFER, textureBuffer); + gl.enableVertexAttribArray(this.aTex!); + gl.vertexAttribPointer(this.aTex!, 2, gl.FLOAT, false, 0, 0); + + const bufferData = + flipVertically ? [0, 1, 0, 0, 1, 0, 1, 1] : [0, 0, 0, 1, 1, 1, 1, 0]; + gl.bufferData( + gl.ARRAY_BUFFER, new Float32Array(bufferData), gl.STATIC_DRAW); + + gl.bindBuffer(gl.ARRAY_BUFFER, null); + gl.bindVertexArray(null); + + return new MPImageShaderBuffers( + gl, vertexArrayObject, vertexBuffer, textureBuffer); + } + + private getShaderBuffers(flipVertically: boolean): MPImageShaderBuffers { + if (flipVertically) { + if (!this.shaderBuffersFlipVertically) { + this.shaderBuffersFlipVertically = + this.createBuffers(/* flipVertically= */ true); + } + return this.shaderBuffersFlipVertically; + } else { + if (!this.shaderBuffersPassthrough) { + this.shaderBuffersPassthrough = + this.createBuffers(/* flipVertically= */ false); + } + return this.shaderBuffersPassthrough; + } + } + + private maybeInitGL(gl: WebGL2RenderingContext): void { + if (!this.gl) { + this.gl = gl; + } else if (gl !== this.gl) { + throw new Error('Cannot change GL context once initialized'); + } + } + + /** Runs the callback using the shader. */ + run( + gl: WebGL2RenderingContext, flipVertically: boolean, + callback: () => T): T { + this.maybeInitGL(gl); + + if (!this.program) { + this.setupShaders(); + } + + const shaderBuffers = this.getShaderBuffers(flipVertically); + gl.useProgram(this.program!); + shaderBuffers.bind(); + const result = callback(); + shaderBuffers.unbind(); + + return result; + } + + /** + * Binds a framebuffer to the canvas. If the framebuffer does not yet exist, + * creates it first. Binds the provided texture to the framebuffer. + */ + bindFramebuffer(gl: WebGL2RenderingContext, texture: WebGLTexture): void { + this.maybeInitGL(gl); + if (!this.framebuffer) { + this.framebuffer = + assertNotNull(gl.createFramebuffer(), 'Failed to create framebuffe.'); + } + gl.bindFramebuffer(gl.FRAMEBUFFER, this.framebuffer); + gl.framebufferTexture2D( + gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0, gl.TEXTURE_2D, texture, 0); + } + + unbindFramebuffer(): void { + this.gl?.bindFramebuffer(this.gl.FRAMEBUFFER, null); + } + + close() { + if (this.program) { + const gl = this.gl!; + gl.deleteProgram(this.program); + gl.deleteShader(this.vertexShader!); + gl.deleteShader(this.fragmentShader!); + } + if (this.framebuffer) { + this.gl!.deleteFramebuffer(this.framebuffer); + } + if (this.shaderBuffersPassthrough) { + this.shaderBuffersPassthrough.close(); + } + if (this.shaderBuffersFlipVertically) { + this.shaderBuffersFlipVertically.close(); + } + } +} diff --git a/mediapipe/tasks/web/vision/core/vision_task_runner.ts b/mediapipe/tasks/web/vision/core/vision_task_runner.ts index 285dbf900..3ff6e0604 100644 --- a/mediapipe/tasks/web/vision/core/vision_task_runner.ts +++ b/mediapipe/tasks/web/vision/core/vision_task_runner.ts @@ -17,8 +17,9 @@ import {NormalizedRect} from '../../../../framework/formats/rect_pb'; import {TaskRunner} from '../../../../tasks/web/core/task_runner'; import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset'; -import {MPImage, MPImageShaderContext} from '../../../../tasks/web/vision/core/image'; +import {MPImage} from '../../../../tasks/web/vision/core/image'; import {ImageProcessingOptions} from '../../../../tasks/web/vision/core/image_processing_options'; +import {MPImageShaderContext} from '../../../../tasks/web/vision/core/image_shader_context'; import {GraphRunner, ImageSource, WasmMediaPipeConstructor} from '../../../../web/graph_runner/graph_runner'; import {SupportImage, WasmImage} from '../../../../web/graph_runner/graph_runner_image_lib'; import {isWebKit} from '../../../../web/graph_runner/platform_utils';