Compare commits

...

394 Commits

Author SHA1 Message Date
Dmitri Gribenko
ec43bea176 No public description
PiperOrigin-RevId: 595039397
2024-01-02 00:01:27 -08:00
Copybara-Service
e23fa531e1 Merge pull request from kinaryml:python-holistic-landmarker
PiperOrigin-RevId: 594995636
2024-01-01 18:49:45 -08:00
MediaPipe Team
8609e5fae5 Adds warning about using Tensors::OpenGlBufferViews with multiple GlContexts.
PiperOrigin-RevId: 592924315
2023-12-21 12:29:11 -08:00
MediaPipe Team
cfb4465c38 Guard WaitOnGpu with extra OpenGL checks.
PiperOrigin-RevId: 592707594
2023-12-20 18:22:27 -08:00
MediaPipe Team
835ee5e354 No public description
PiperOrigin-RevId: 592683556
2023-12-20 16:16:50 -08:00
MediaPipe Team
52c1d44561 TypeScript: adding VideoFrame typings support to video input
PiperOrigin-RevId: 592640146
2023-12-20 13:16:55 -08:00
Sebastian Schmidt
1fa79195ec Rename Holistic's landmarksConfidence to presenceConfidence to match other APIs better
PiperOrigin-RevId: 592607279
2023-12-20 11:07:46 -08:00
Sebastian Schmidt
7edcba9fc0 Upgrade TypeScript to 5.3.3
PiperOrigin-RevId: 592599208
2023-12-20 10:38:57 -08:00
MediaPipe Team
1a88e75a37 No public description
PiperOrigin-RevId: 592472709
2023-12-20 00:58:34 -08:00
MediaPipe Team
42aa649aa6 Explicitly cast at callsite of WebGL context creation to avoid compilation errors with newer Emscripten versions.
PiperOrigin-RevId: 592409915
2023-12-19 19:45:08 -08:00
MediaPipe Team
473757c6cc No public description
PiperOrigin-RevId: 592358739
2023-12-19 15:22:26 -08:00
MediaPipe Team
7c3a5296ab Make DeletingFile movable.
PiperOrigin-RevId: 592332785
2023-12-19 13:48:20 -08:00
Sebastian Schmidt
5ee24d1662 Expose MediaPipe's ABSL and Sentencepiece as shared dependencies
PiperOrigin-RevId: 592256006
2023-12-19 09:21:57 -08:00
MediaPipe Team
4f4f107a8b Add Holistic Landmarker Web API.
PiperOrigin-RevId: 592006517
2023-12-18 14:17:57 -08:00
MediaPipe Team
91cfc691e4 Remove superfluous glFlush().
PiperOrigin-RevId: 591954599
2023-12-18 11:13:16 -08:00
Copybara-Service
e2ea358919 Merge pull request from priankakariatyml:ios-pose-landmarker-implementation
PiperOrigin-RevId: 591910121
2023-12-18 08:54:34 -08:00
Kinar
569c16db0b Documented HolisticLandmarkerResult 2023-12-18 04:37:29 -08:00
Kinar
cba0878652 Updated Copyright year 2023-12-18 02:51:11 -08:00
Kinar
a667949b99 Changed import style in task_info 2023-12-18 02:49:20 -08:00
Kinar
30e6b766d4 Revised implementation and added more tests 2023-12-18 02:47:28 -08:00
Kinar R
88463aeb9e
Merge branch 'google:master' into python-holistic-landmarker 2023-12-18 15:01:43 +05:30
Kinar R
c0606e819c
Update holistic_landmarker.py 2023-12-18 05:00:45 +05:30
Kinar R
00eb1f190f
Fix typo in holistic_landmarker_test.py 2023-12-18 04:57:25 +05:30
Kinar
24fe8eb73a Support both proto2 and proto3 in task subgraph options configuration, and revised the Holistic Landmarker API's implementation 2023-12-17 15:13:34 -08:00
Sebastian Schmidt
4b471266b8 No public description
PiperOrigin-RevId: 591370734
2023-12-15 15:10:53 -08:00
MediaPipe Team
41db137d37 No public description
PiperOrigin-RevId: 591351708
2023-12-15 14:00:39 -08:00
MediaPipe Team
e55caa234c No public description
PiperOrigin-RevId: 591148449
2023-12-14 22:25:41 -08:00
MediaPipe Team
28d5546d9d Internal change
PiperOrigin-RevId: 591121815
2023-12-14 19:48:57 -08:00
Copybara-Service
d6b8c2257b Merge pull request from kinaryml:c-face-landmarker-api
PiperOrigin-RevId: 591062403
2023-12-14 15:07:10 -08:00
MediaPipe Team
746d775933 Extend verifyGraph to be compatible with proto3.
PiperOrigin-RevId: 591047275
2023-12-14 14:12:36 -08:00
Kinar
5e75a169d3 Fix rows to columns for the field cols in struct Matrix 2023-12-14 09:02:05 -08:00
Kinar
f4bbfef674 Use memcpy now for copying data and indicate how the data is stored 2023-12-14 08:56:38 -08:00
Sebastian Schmidt
df7feadaf7 No public description
PiperOrigin-RevId: 590950821
2023-12-14 08:44:26 -08:00
MediaPipe Team
4004c2dfaa No public description
PiperOrigin-RevId: 590913209
2023-12-14 06:05:32 -08:00
Kinar
6fab3a8b85 Simplified copying data from Eigen matrix to C-style matrix and addressed some issues 2023-12-13 19:59:36 -08:00
Copybara-Service
04bcb0d2ee Merge pull request from priankakariatyml:ios-mpp-image-utils-pixel-buffer-format-fix
PiperOrigin-RevId: 590770032
2023-12-13 18:03:58 -08:00
MediaPipe Team
4e89de69a6 Version increment in version.bzl
PiperOrigin-RevId: 590658960
2023-12-13 11:19:06 -08:00
MediaPipe Team
4892209da9 No public description
PiperOrigin-RevId: 590629265
2023-12-13 09:49:59 -08:00
Sebastian Schmidt
15f2b32006 Expose MediaPipe's ABSL and Sentencepiece as shared dependencies
PiperOrigin-RevId: 590320902
2023-12-12 13:49:54 -08:00
Sebastian Schmidt
4237b765ce Internal
PiperOrigin-RevId: 590220032
2023-12-12 08:58:08 -08:00
MediaPipe Team
9a20d6b3e4 Create an explicit GlRuntimeException class
PiperOrigin-RevId: 590035213
2023-12-11 19:59:11 -08:00
Kinar
42b251cb8d Updated tests in face_landmarker_result_converter_test 2023-12-11 16:11:39 -08:00
Jiuqiang Tang
bd946db5a6 No public description
PiperOrigin-RevId: 589890130
2023-12-11 11:43:51 -08:00
Prianka Liz Kariat
c1cab6e9dc Merge branch 'master' into ios-mpp-image-utils-pixel-buffer-format-fix 2023-12-11 19:41:15 +05:30
Prianka Liz Kariat
04f826e9d3 Fixed function names in MPPImage Utils 2023-12-11 19:40:13 +05:30
Kinar
0200d32285 Changed Recognition to Detection 2023-12-11 05:20:48 -08:00
Kinar
fec4dff0d6 Added Face Landmarker C Tasks API and tests 2023-12-11 04:58:15 -08:00
MediaPipe Team
61efcf5a11 internal-only change
PiperOrigin-RevId: 589284167
2023-12-08 17:23:55 -08:00
Dmitri Gribenko
4e78e645d0 No public description
PiperOrigin-RevId: 589279414
2023-12-08 17:01:53 -08:00
Copybara-Service
faae68e81d Merge pull request from kinaryml:c-hand-landmarker-api
PiperOrigin-RevId: 588900627
2023-12-07 13:58:10 -08:00
Sebastian Schmidt
20743b8110 Update MediaPipe development version to 0.10.9
PiperOrigin-RevId: 588890763
2023-12-07 13:27:41 -08:00
Jiuqiang Tang
0a77b8c57b No public description
PiperOrigin-RevId: 588827865
2023-12-07 10:04:46 -08:00
MediaPipe Team
66655a15b2 API 2: Do not redirect from MEDIAPIPE_REGISTER_NODE to REGISTER_CALCULATOR
The problem with redirecting is that the calling code has to include API 1 code (often third_party/mediapipe/framework/calculator_framework.h), even though they are only using API 2 functionality (albeit deprecated). This can lead to weird issues, e.g. MakePacket<T> confusingly does not return a Packet<T>, but a Packet, because it's using the API 1 version.

PiperOrigin-RevId: 588798455
2023-12-07 08:20:28 -08:00
Kinar
6909504ca9 Fix naming in different files 2023-12-07 00:25:54 -08:00
MediaPipe Team
fad3785721 Export java package for hand_roi_refinement_graph_options.
PiperOrigin-RevId: 588537174
2023-12-06 14:08:46 -08:00
MediaPipe Team
78af80027a No user facing changes
PiperOrigin-RevId: 588501289
2023-12-06 12:05:21 -08:00
Sebastian Schmidt
0a3f27d1ce Move hand_roi_refinement_graph_options_java_proto_lite to vision lib
PiperOrigin-RevId: 588444225
2023-12-06 09:04:08 -08:00
Prianka Liz Kariat
1e04ec3cc2 Enabled stream mode on iOS pose landmarker 2023-12-06 21:40:08 +05:30
MediaPipe Team
e4a6ea3079 No public description
PiperOrigin-RevId: 588376739
2023-12-06 04:31:59 -08:00
Kinar
b5c1c11f6a Added Hand Landmarker C Tasks API and tests 2023-12-06 02:59:39 -08:00
Sebastian Schmidt
0f90ba17dc Use Java Proto Lite Target for Hand ROI Refinement proto
PiperOrigin-RevId: 588170664
2023-12-05 13:26:23 -08:00
Sebastian Schmidt
dad2626f91 No public description
PiperOrigin-RevId: 587850211
2023-12-04 14:58:44 -08:00
MediaPipe Team
b91ec031a2 Adding version.bzl for tracking version
PiperOrigin-RevId: 587767961
2023-12-04 10:31:42 -08:00
Prianka Liz Kariat
28d62d55ac Added iOS Pose Landmarker to CocoaPods build 2023-12-04 21:35:53 +05:30
Kinar
ea95ae753d Uncommented configuring options for Holistic Landmarker 2023-12-04 04:12:22 -08:00
Kinar
66f8625a42 Added Holistic Landmarker Python API 2023-12-04 04:10:34 -08:00
Dmitri Gribenko
3d8b715dd6 No public description
PiperOrigin-RevId: 587559637
2023-12-03 17:53:22 -08:00
MediaPipe Team
507d677d44 Internal change
PiperOrigin-RevId: 587325154
2023-12-02 09:47:33 -08:00
Sebastian Schmidt
f35ecb6c8b Add dependency on hand_roi_refinement_graph_options_proto
PiperOrigin-RevId: 587082550
2023-12-01 12:21:07 -08:00
Youchuan Hu
a0eda45baf Add TensorsToSegmentationCalculator test utilities.
PiperOrigin-RevId: 586817713
2023-11-30 16:17:17 -08:00
Sebastian Schmidt
7013b23785 No public description
PiperOrigin-RevId: 586813896
2023-11-30 16:06:16 -08:00
Sebastian Schmidt
3433ba083a Move LanguageDetectorResult converter to LanguageDetector task
PiperOrigin-RevId: 586812754
2023-11-30 16:01:14 -08:00
Copybara-Service
80e4e1599a Merge pull request from priankakariatyml:ios-pose-landmarker-implementation
PiperOrigin-RevId: 586811420
2023-11-30 15:53:32 -08:00
Sebastian Schmidt
2b53891a7c Add lib targets for all C vision tasks
PiperOrigin-RevId: 586805240
2023-11-30 15:37:35 -08:00
Copybara-Service
7d73a3e1fd Merge pull request from kinaryml:c-landmarker-apis
PiperOrigin-RevId: 586804764
2023-11-30 15:32:12 -08:00
Sebastian Schmidt
e5c7ebec12 Add libtext and libvision build rules
PiperOrigin-RevId: 586804071
2023-11-30 15:27:19 -08:00
Copybara-Service
53cd40cdd0 Merge pull request from priankakariatyml:ios-interactive-segmenter-containers
PiperOrigin-RevId: 586779717
2023-11-30 14:05:42 -08:00
Kinar
9a5aa1b360 Refactor GestureRecognizerResult conversion for default initialization 2023-11-30 09:13:10 -08:00
Prianka Liz Kariat
90622475a2 Added iOS interactive segmenter header 2023-11-30 02:32:08 +05:30
Prianka Liz Kariat
f5ac0637a2 Updated iOS vision/core to add methods for processing region of interest 2023-11-30 02:31:48 +05:30
Prianka Liz Kariat
4c02980b3f Added iOS region of interest helpers 2023-11-30 02:31:11 +05:30
Prianka Liz Kariat
4137dbcbf5 Added iOS region of interest 2023-11-30 02:30:42 +05:30
Prianka Liz Kariat
3532503354 Added iOS interactive segmenter options 2023-11-30 02:05:23 +05:30
Copybara-Service
bb4906bcd3 Merge pull request from kinaryml:python-vision-benchmark-scripts
PiperOrigin-RevId: 586349225
2023-11-29 08:27:59 -08:00
Kinar
d19d5a50be Added FreeMemory test for GestureRecognizerResult 2023-11-29 03:17:35 -08:00
Kinar R
6ed5e3d0df
Merge branch 'master' into c-landmarker-apis 2023-11-29 16:41:12 +05:30
Kinar
3c655e2334 Revised Gesture Recognizer API implementation and associated tests 2023-11-29 03:08:09 -08:00
MediaPipe Team
91589b10d3 internal change.
PiperOrigin-RevId: 586156439
2023-11-28 18:07:30 -08:00
Sebastian Schmidt
62bafd39bb HolisticLandmarker Java API
PiperOrigin-RevId: 586113048
2023-11-28 15:00:54 -08:00
Sebastian Schmidt
a898215c52 Holistic Landmarker C++ Graph
PiperOrigin-RevId: 586105983
2023-11-28 14:36:19 -08:00
MediaPipe Team
95601ff98b Remove internal logs.
PiperOrigin-RevId: 585782033
2023-11-27 15:51:21 -08:00
Copybara-Service
6fa9848a15 Merge pull request from amacsmith:master
PiperOrigin-RevId: 585775506
2023-11-27 15:25:00 -08:00
Copybara-Service
090e74a0aa Merge pull request from kinaryml:c-object-detector-api
PiperOrigin-RevId: 585765946
2023-11-27 14:51:35 -08:00
MediaPipe Team
1ff7e95295 No public description
PiperOrigin-RevId: 585719403
2023-11-27 12:01:38 -08:00
Sebastian Schmidt
8d57a9e2e8 Add missing export declarations to DrawingUtils
Fixes https://github.com/google/mediapipe/issues/4980

PiperOrigin-RevId: 585705106
2023-11-27 11:16:23 -08:00
Kinar
17c0c960be Added Gesture Recognizer C API and tests 2023-11-27 04:51:32 -08:00
Alex Macdonald-Smith
5ca859f90b
Updated mediapipe/mediapipe/tasks/web /vision/README.md
There was a typo in the url referencing Gesture Recognizer

```
const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
    "hhttps://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task"
);
```

changed to 
```
const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
    "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task"
);
```

The extra 'h' was dropped.

Let me know if there are anymore updates needed for this.
2023-11-21 16:03:57 -05:00
Tony Allevato
e7edd97eff Internal change.
PiperOrigin-RevId: 584399894
2023-11-21 12:33:58 -08:00
MediaPipe Team
0d298d7a67 No public description
PiperOrigin-RevId: 584349220
2023-11-21 09:39:09 -08:00
Prianka Liz Kariat
447f9cc452 Fixed formatting of MPPPoseLandmarkerTests.mm 2023-11-20 22:37:43 +05:30
Prianka Liz Kariat
972e3d81c0 Added iOS Objective C Pose Landmarker Tests 2023-11-20 22:33:58 +05:30
Matt Kreileder
d8fd986517 No public description
PiperOrigin-RevId: 583973946
2023-11-20 03:32:39 -08:00
MediaPipe Team
5cd3037443 Adding a GpuTestWithParamBase test class to support value parameterized tests
PiperOrigin-RevId: 583967017
2023-11-20 03:02:22 -08:00
MediaPipe Team
bd4be30b02 No public description
PiperOrigin-RevId: 583936442
2023-11-20 00:59:42 -08:00
MediaPipe Team
42d42a5ea1 Ensure that releaseGl() is called if prepapreGl throws
Without this logic, we might have resources created within prepareGl() leaking, since they will never be released.

PiperOrigin-RevId: 583491569
2023-11-17 14:52:34 -08:00
Sebastian Schmidt
d29ea119ff Add the result class for the HolisticLandmarker Java API
PiperOrigin-RevId: 583426528
2023-11-17 10:29:42 -08:00
Sebastian Schmidt
9456c64830 No public description
PiperOrigin-RevId: 583417701
2023-11-17 10:02:53 -08:00
Kinar
6bdc7ce016 Removed unused param 2023-11-16 16:39:21 -08:00
Kinar
46c6c9403c Code cleanup and revised benchmarking API 2023-11-16 16:26:29 -08:00
MediaPipe Team
e7c7638833 No public description
PiperOrigin-RevId: 583186277
2023-11-16 15:38:42 -08:00
Kinar
8f32fda6d8 Added more benchmark scripts for the Tasks Python API 2023-11-16 12:53:36 -08:00
Kinar
b879e3a204 Updated components and their tests in the C Tasks API 2023-11-16 10:05:34 -08:00
Copybara-Service
7287056674 Merge pull request from kinaryml:python-image-classifier-bench
PiperOrigin-RevId: 583072872
2023-11-16 09:38:05 -08:00
MediaPipe Team
f13c6974ee Extract CPU conversion methods into a separate library & add test
PiperOrigin-RevId: 582966041
2023-11-16 02:34:22 -08:00
Sebastian Schmidt
12340a8e82 Use gl.LINEAR interpolation for confidence masks
PiperOrigin-RevId: 582777383
2023-11-15 13:02:14 -08:00
Sebastian Schmidt
47e217896c Add drawConfidenceMask() to our public API
PiperOrigin-RevId: 582647409
2023-11-15 06:12:12 -08:00
MediaPipe Team
e440a4da56 Explicitly delete some copy operations to improve compile errors.
PiperOrigin-RevId: 582595026
2023-11-15 02:23:05 -08:00
Kinar
252cca72e7 Allowed a default value for the model argument 2023-11-13 21:44:27 -08:00
Kinar
f8add5ad42 Documented the return value and added percentile to argparser 2023-11-13 21:17:28 -08:00
Sebastian Schmidt
a38467bae0 Internal
PiperOrigin-RevId: 582098762
2023-11-13 15:15:53 -08:00
Youchuan Hu
71e9929f60 Refactor OpenCV path out of TensorsToSegmentationCalculator main file.
ProcessCpu() is changed into an OpenCV converter that is owned by the calculator. The calculator should call converter.Convert() to get the conversion result.

PiperOrigin-RevId: 582010350
2023-11-13 10:33:07 -08:00
Kinar
1c860cace6 Added files for the Object Detector C Tasks API 2023-11-13 09:53:37 -08:00
Sebastian Schmidt
d504d3bf22 Create shared utilities to construct landmark lists
PiperOrigin-RevId: 581970043
2023-11-13 08:24:19 -08:00
Kinar
38737849e6 Updated copyright 2023-11-11 03:34:57 -08:00
Kinar
99c8b9ee3c Updated copyright 2023-11-11 03:34:26 -08:00
Kinar
021c7edde7 Updated README and script 2023-11-11 03:32:45 -08:00
Kinar
35f2f36733 Added image classifier benchmark 2023-11-11 03:25:36 -08:00
MediaPipe Team
939a9c2a37 No public description
PiperOrigin-RevId: 581469194
2023-11-10 23:59:21 -08:00
MediaPipe Team
ad4da8c9cc No public description
PiperOrigin-RevId: 581468467
2023-11-10 23:53:57 -08:00
MediaPipe Team
418680936d No public description
PiperOrigin-RevId: 581450685
2023-11-10 21:55:41 -08:00
MediaPipe Team
4ad67abd70 ...internal change...
PiperOrigin-RevId: 581375224
2023-11-10 14:46:01 -08:00
MediaPipe Team
5dec91226d No public description
PiperOrigin-RevId: 581322099
2023-11-10 11:39:24 -08:00
MediaPipe Team
64b21d758e Remove batch dimension from the output of tflite_with_tokenizer in text classifier.
PiperOrigin-RevId: 581292824
2023-11-10 10:05:40 -08:00
MediaPipe Team
d772bf8134 Add BinaryAUC metric and Best Checkpoint callback to Text Classifier
PiperOrigin-RevId: 581276382
2023-11-10 09:04:26 -08:00
MediaPipe Team
fd4859c178 Refactor OpenCV path out of TensorsToSegmentationCalculator main file.
ProcessCpu() is changed into an OpenCV converter that is owned by the calculator. The calculator should call converter.Convert() to get the conversion result.

PiperOrigin-RevId: 581103226
2023-11-09 20:06:47 -08:00
Youchuan Hu
1038f8176d Refactor OpenCV path out of TensorsToSegmentationCalculator main file.
ProcessCpu() is changed into an OpenCV converter that is owned by the calculator. The calculator should call converter.Convert() to get the conversion result.

PiperOrigin-RevId: 581073731
2023-11-09 17:28:24 -08:00
MediaPipe Team
333125ac20 Add some convenience getters to EglManager.
PiperOrigin-RevId: 581049412
2023-11-09 15:58:06 -08:00
Sebastian Schmidt
edca85c5d3 Create shared utilities to construct category lists
PiperOrigin-RevId: 581009898
2023-11-09 13:38:21 -08:00
MediaPipe Team
6532ce5c59 Refactor OpenCV path out of TensorsToSegmentationCalculator main file.
ProcessCpu() is changed into an OpenCV converter that is owned by the calculator. The calculator should call converter.Convert() to get the conversion result.

PiperOrigin-RevId: 580937591
2023-11-09 09:45:10 -08:00
MediaPipe Team
a9a169372a Fixes multiple typos in the calculator's internal files.
PiperOrigin-RevId: 580907788
2023-11-09 08:02:54 -08:00
MediaPipe Team
7c5c216652 Exposes a handle to AHardwareBuffers through a new GpuBuffer view
PiperOrigin-RevId: 580754933
2023-11-08 20:06:35 -08:00
MediaPipe Team
252c7eef25 Add option to omit the checkpoint callback in text classifier.
PiperOrigin-RevId: 580658724
2023-11-08 14:30:00 -08:00
Youchuan Hu
ae606c1550 Refactor OpenCV path out of TensorsToSegmentationCalculator main file.
ProcessCpu() is changed into an OpenCV converter that is owned by the calculator. The calculator should call converter.Convert() to get the conversion result.

PiperOrigin-RevId: 580625461
2023-11-08 12:53:52 -08:00
Copybara-Service
d4d30768be Merge pull request from kinaryml:c-image-embedder-api
PiperOrigin-RevId: 580618718
2023-11-08 12:35:05 -08:00
MediaPipe Team
000314a545 No public description
PiperOrigin-RevId: 580614241
2023-11-08 12:17:12 -08:00
Copybara-Service
65e74dde0f Merge pull request from priankakariatyml:ios-pose-landmarker-implementation
PiperOrigin-RevId: 580578919
2023-11-08 10:39:24 -08:00
Copybara-Service
8d4407b04e Merge pull request from priankakariatyml:ios-language-detector-tests
PiperOrigin-RevId: 580577290
2023-11-08 10:34:06 -08:00
MediaPipe Team
81a07e2e32 No public description
PiperOrigin-RevId: 580504831
2023-11-08 05:53:54 -08:00
MediaPipe Team
6ea6f28250 Creates GpuBuffers around pre-allocated AHardware_Buffer objects.
PiperOrigin-RevId: 580358465
2023-11-07 18:01:37 -08:00
Kinar
c442d6117e Resolved issues and added a common header to hold all the necessary structures for the vision tasks 2023-11-07 14:23:15 -08:00
MediaPipe Team
c375761480 No public description
PiperOrigin-RevId: 580292393
2023-11-07 14:02:30 -08:00
Sebastian Schmidt
8d370f4f5b Remove const from input types of C API
PiperOrigin-RevId: 580217902
2023-11-07 10:08:51 -08:00
Kinar
197358dfee Drop default arguments in Image Embedder C API 2023-11-07 07:34:08 -08:00
Kinar
60fcfa74cc Fixed some typos in the error message 2023-11-07 07:26:57 -08:00
Kinar
b0725b46fb Fixed merge conflicts 2023-11-07 07:12:58 -08:00
Kinar R
42a916ad4f
Merge branch 'master' into c-image-embedder-api 2023-11-07 20:37:09 +05:30
Kinar
d9080c0d38 Updated the Image Embedder C API and added tests for cosine similarity 2023-11-07 07:02:08 -08:00
Prianka Liz Kariat
9d9a5dc5e7 Added iOS language detector tests 2023-11-07 11:15:01 +05:30
Prianka Liz Kariat
b5b0d6eee7 Fixed graph name in iOS language detector 2023-11-07 11:13:25 +05:30
Prianka Liz Kariat
32571a37d2 Added pose landmarker protobuf utils 2023-11-07 09:49:53 +05:30
Prianka Liz Kariat
91095c2d6a Added null check for segmentation masks in pose landmarker helper initializer 2023-11-07 09:49:42 +05:30
Prianka Liz Kariat
1d0f3734b4 Added iOS MPPPoseLandmarker.mm 2023-11-07 09:46:39 +05:30
Sebastian Schmidt
2abaabce0e Drop default arguments in C API
PiperOrigin-RevId: 579965820
2023-11-06 14:55:41 -08:00
MediaPipe Team
a8d88bf7cf Creates GpuBuffers around pre-allocated AHardware_Buffer objects.
PiperOrigin-RevId: 579961642
2023-11-06 14:42:13 -08:00
Sebastian Schmidt
077b52250d Pass Model Asset Buffer as byte array + length
PiperOrigin-RevId: 579944283
2023-11-06 13:42:40 -08:00
Kinar R
3b122a1e61
Merge branch 'google:master' into c-image-embedder-api 2023-11-07 02:00:23 +05:30
MediaPipe Team
5f0d24d741 Fixes typo in GlCalculatorHelper::UpdateContract argument name
PiperOrigin-RevId: 579832146
2023-11-06 07:08:33 -08:00
MediaPipe Team
0b53c9752f Fixes multiple typos in the calculator's internal files.
PiperOrigin-RevId: 579718764
2023-11-05 20:49:16 -08:00
MediaPipe Team
e22b7d5dd4 Example updated for mp.Image in documentation
PiperOrigin-RevId: 579277510
2023-11-03 12:58:47 -07:00
Sebastian Schmidt
1c46e43088 Update WASM files for 0.10.8 relese
PiperOrigin-RevId: 579032432
2023-11-02 18:00:54 -07:00
Sebastian Schmidt
8f564c4b7b Allow OffscreenCanvas to be used by DrawingUtils
PiperOrigin-RevId: 579021013
2023-11-02 17:03:07 -07:00
Copybara-Service
1cc79001f4 Merge pull request from priankakariatyml:ios-image-segmenter-cocoapods-build
PiperOrigin-RevId: 578993411
2023-11-02 15:24:59 -07:00
Copybara-Service
1b8a0ee6af Merge pull request from priankakariatyml:ios-pose-landmarker-impl
PiperOrigin-RevId: 578991151
2023-11-02 15:23:04 -07:00
Copybara-Service
35b9453da4 Merge pull request from google:mrschmidt/gpu
PiperOrigin-RevId: 578991016
2023-11-02 15:18:10 -07:00
Copybara-Service
b4ce39cbf7 Merge pull request from priankakariatyml:ios-image-utils-fix
PiperOrigin-RevId: 578990990
2023-11-02 15:15:58 -07:00
Copybara-Service
35010894c1 Merge pull request from priankakariatyml:ios-language-detector
PiperOrigin-RevId: 578990815
2023-11-02 15:10:37 -07:00
MediaPipe Team
9018ca699b Creates GpuBuffers around pre-allocated AHardware_Buffer objects.
PiperOrigin-RevId: 578850184
2023-11-02 07:46:51 -07:00
MediaPipe Team
f8197651e8 Add AT_FIRST_TICK processing to SidePacketToStreamCalculator.
PiperOrigin-RevId: 578824863
2023-11-02 05:58:35 -07:00
Sebastian Schmidt
e81fc5d0aa Access document via self.document
PiperOrigin-RevId: 578635298
2023-11-01 14:08:09 -07:00
Sebastian Schmidt
9474394768 Add drawCategoryMask() to our public API
PiperOrigin-RevId: 578526413
2023-11-01 08:33:18 -07:00
MediaPipe Team
3a55f1156a No public description
PiperOrigin-RevId: 578496866
2023-11-01 06:21:05 -07:00
MediaPipe Team
c6aa9cbaef No public description
PiperOrigin-RevId: 578303180
2023-10-31 14:29:38 -07:00
MediaPipe Team
35f2f98a1c No public description
PiperOrigin-RevId: 578266141
2023-10-31 12:30:04 -07:00
MediaPipe Team
b9ff9708e3 Upgrade to use Gradle 8.4
PiperOrigin-RevId: 578259506
2023-10-31 12:07:37 -07:00
MediaPipe Team
a4048eee11 Add video and live stream processing and tests for Image Classifier C API
PiperOrigin-RevId: 578242391
2023-10-31 11:14:28 -07:00
MediaPipe Team
7da2810b83 Move filtering logic of score to ConvertToDetection.
PiperOrigin-RevId: 578189518
2023-10-31 08:20:03 -07:00
Sebastian Schmidt
95692c64a9 Add GPU support 2023-10-30 16:05:12 -06:00
Sebastian Schmidt
ec032fb018 Use SRGBA for Mac on Python for image test
PiperOrigin-RevId: 577931014
2023-10-30 13:07:10 -07:00
Sebastian Schmidt
7256bd2638 No public description
PiperOrigin-RevId: 577873616
2023-10-30 09:46:16 -07:00
MediaPipe Team
2f4d7b4079 No public description
PiperOrigin-RevId: 577410310
2023-10-28 01:10:02 -07:00
MediaPipe Team
a96581e3b7 TensorsToDetectionsCalculator supports multi clasees for a bbox.
PiperOrigin-RevId: 577300797
2023-10-27 14:13:02 -07:00
Sebastian Schmidt
d73ef24406 Support 3-channel RGB images for Mac Python
PiperOrigin-RevId: 577240413
2023-10-27 10:34:08 -07:00
MediaPipe Team
eaf0807849 Fixes multiple typos in the calculator's internal files.
PiperOrigin-RevId: 577202836
2023-10-27 08:08:52 -07:00
MediaPipe Team
46cca0d486 Rolling back.
PiperOrigin-RevId: 577128565
2023-10-27 02:25:42 -07:00
Sebastian Schmidt
2cb0100fe6 Use mp.ImageFormat instead of just ImageFormat
Fixes https://github.com/google/mediapipe/issues/4911

PiperOrigin-RevId: 577003083
2023-10-26 15:32:28 -07:00
MediaPipe Team
5459705038 Adding two new immutable texture GpuBufferFormat types
PiperOrigin-RevId: 577002534
2023-10-26 15:27:43 -07:00
Youchuan Hu
e7121e4feb Use designated initializers for TensorsToSegmentationCalculator tests.
PiperOrigin-RevId: 576671943
2023-10-25 16:02:28 -07:00
Sebastian Schmidt
a277d853ea Don't drop status message in ConvertFromImageFrame
PiperOrigin-RevId: 576667666
2023-10-25 15:46:41 -07:00
MediaPipe Team
3017c02d3d No public description
PiperOrigin-RevId: 576663264
2023-10-25 15:30:54 -07:00
Copybara-Service
06dab1e526 Merge pull request from markmcd:ios-docgen
PiperOrigin-RevId: 576628799
2023-10-25 14:00:39 -07:00
MediaPipe Team
496a6ed809 No public description
PiperOrigin-RevId: 576314429
2023-10-24 16:08:19 -07:00
Sebastian Schmidt
5f2b9fd765 Speed up Python build by only building binary graph
PiperOrigin-RevId: 576260883
2023-10-24 13:41:17 -07:00
Sebastian Schmidt
c698414c71 Use cc_library for DrishtiMetalHelper
PiperOrigin-RevId: 576230898
2023-10-24 12:40:15 -07:00
Youchuan Hu
905a18c88c Add CPU tests for TensorsToSegmentationCalculator
PiperOrigin-RevId: 576208735
2023-10-24 11:35:58 -07:00
Sebastian Schmidt
5b0f1f9ac4 No public description
PiperOrigin-RevId: 576166645
2023-10-24 09:34:24 -07:00
MediaPipe Team
543b595971 Fix internal incensistency in parsing code
PiperOrigin-RevId: 576094494
2023-10-24 04:27:18 -07:00
Sebastian Schmidt
05564cbe9a No public description
PiperOrigin-RevId: 575930740
2023-10-23 14:36:03 -07:00
Sebastian Schmidt
aedafd63f9 Remove objc_library from Python build path for Mac GPU build
Addresses https://github.com/bazelbuild/bazel/issues/19912

PiperOrigin-RevId: 575896231
2023-10-23 12:36:14 -07:00
Copybara-Service
a39df33664 Merge pull request from kinaryml:face-stylizer-adding-unit-tests-to-api
PiperOrigin-RevId: 575895855
2023-10-23 12:31:33 -07:00
Sebastian Schmidt
b904ade0cf Allow Mac to use GPU Delegate
PiperOrigin-RevId: 575882254
2023-10-23 11:44:02 -07:00
Sebastian Schmidt
6aa27d9aeb Initialize GPU support for Python Task API
PiperOrigin-RevId: 575842513
2023-10-23 09:39:16 -07:00
Prianka Liz Kariat
d5a1bc03af Fixed deletion of iOS output MPImage buffer in MPImage Utils 2023-10-23 20:28:43 +05:30
Prianka Liz Kariat
7c45bc802f Added iOS Image Segmenter to CocoaPods build 2023-10-23 20:02:57 +05:30
Prianka Liz Kariat
305f076c7f Fixed extra condition check in iOS Image Segmenter Result Helper 2023-10-23 20:02:39 +05:30
Kinar
4b3cb5b758 Added files for the Image Embedder C API and tests 2023-10-23 00:30:51 -07:00
MediaPipe Team
0dee33ccba No public description
PiperOrigin-RevId: 575477678
2023-10-21 10:26:09 -07:00
Prianka Liz Kariat
3a43aff13c Added iOS language detector implementation 2023-10-21 10:42:48 +05:30
Prianka Liz Kariat
f185bc6635 Added language detector result helpers 2023-10-21 10:42:32 +05:30
Prianka Liz Kariat
c48a5668b8 Updated documentation 2023-10-21 03:57:55 +05:30
Prianka Liz Kariat
96ed3a7422 Added iOS pose landmarker header 2023-10-21 03:55:00 +05:30
Prianka Liz Kariat
3622ff9bff Added iOS pose landmarks connections 2023-10-21 03:53:54 +05:30
Prianka Liz Kariat
c4315c500d Added pose landmarker result helpers 2023-10-21 03:52:46 +05:30
MediaPipe Team
8fc3a0473f Add scaling support to surface view renderer.
PiperOrigin-RevId: 575134648
2023-10-20 01:08:17 -07:00
Sebastian Schmidt
305d7abec4 Add a field to GPUBuffer C struct so FFIGen can handle it
PiperOrigin-RevId: 575020084
2023-10-19 15:29:44 -07:00
Copybara-Service
1601073cf0 Merge pull request from priankakariatyml:ios-image-segmenter-populate-labels
PiperOrigin-RevId: 574977633
2023-10-19 13:07:53 -07:00
Copybara-Service
1c7ea02b0e Merge pull request from priankakariatyml:ios-image-segmenter-documentation-updates
PiperOrigin-RevId: 574966608
2023-10-19 12:32:23 -07:00
Sebastian Schmidt
5779f5e9da Allow GPU Origin Proto to be build by Maven
PiperOrigin-RevId: 574966597
2023-10-19 12:30:38 -07:00
Copybara-Service
02e0ce3f87 Merge pull request from priankakariatyml:ios-language-detector-containers
PiperOrigin-RevId: 574954531
2023-10-19 11:58:54 -07:00
Copybara-Service
ddf46a2a61 Merge pull request from priankakariatyml:ios-pose-landmarker-containers
PiperOrigin-RevId: 574954344
2023-10-19 11:54:25 -07:00
Sebastian Schmidt
2d0d258403 Delete arm64 only file in Mac wheel
Fixes https://github.com/google/mediapipe/issues/4888#issuecomment-1768861583

PiperOrigin-RevId: 574938905
2023-10-19 11:16:28 -07:00
Sebastian Schmidt
66570c3dfc No public description
PiperOrigin-RevId: 574938418
2023-10-19 11:11:52 -07:00
Fergus Henderson
f9fa7cfbeb No public description
PiperOrigin-RevId: 574913082
2023-10-19 10:10:26 -07:00
Prianka Liz Kariat
06d893a9f9 Revert "Updated deletion in FreeDataProviderReleaseCallback"
This reverts commit 69b7a21368.
2023-10-19 20:45:32 +05:30
Prianka Liz Kariat
69b7a21368 Updated deletion in FreeDataProviderReleaseCallback 2023-10-19 20:42:06 +05:30
Prianka Liz Kariat
af9a7e7e40 Added documentation 2023-10-19 20:27:51 +05:30
Prianka Liz Kariat
b9c869494d Fixed formatting of MPPImage+Utils.mm 2023-10-19 19:59:59 +05:30
Prianka Liz Kariat
4668d683d5 Updated implementation of MPPImage Utils to reduce lines of code 2023-10-19 19:59:09 +05:30
Prianka Liz Kariat
ad68122069 Added support for creating CVPixelBuffer from C++ Images to iOS MPPImage Utils 2023-10-19 19:58:40 +05:30
Prianka Liz Kariat
032d7a5d22 Removed support for CVPixelBuffer of type 32RGBA 2023-10-19 19:56:44 +05:30
Prianka Liz Kariat
0fe677b78f Updated supported pixel formats in iOS image classifier Documentation 2023-10-19 19:24:40 +05:30
MediaPipe Team
7dca7ad24e Internal change.
PiperOrigin-RevId: 574777627
2023-10-19 01:50:24 -07:00
MediaPipe Team
259fa86c62 Add implementation and tests for Image Classifier C API
PiperOrigin-RevId: 574679661
2023-10-18 18:57:19 -07:00
Sebastian Schmidt
364048daca Allow Python to be build on Mac with GPU support
PiperOrigin-RevId: 574625520
2023-10-18 15:45:17 -07:00
Sebastian Schmidt
4f29ffcc3e Add GPU Origin proto to Java Tasks Library
PiperOrigin-RevId: 574535005
2023-10-18 11:18:46 -07:00
MediaPipe Team
06cc6d1546 No public description
PiperOrigin-RevId: 574528013
2023-10-18 10:58:53 -07:00
MediaPipe Team
2bd6726c89 Plumb an optional default Executor and set of input side packets
through TaskApiFactory::Create so that consumers of that API
can provide these inputs to their underlying graph.

PiperOrigin-RevId: 574503266
2023-10-18 09:51:43 -07:00
MediaPipe Team
e27bbf15dc No public description
PiperOrigin-RevId: 574497996
2023-10-18 09:46:36 -07:00
MediaPipe Team
d006304f6a Migrate ParseTagAndName to use absl::string_view
PiperOrigin-RevId: 574492000
2023-10-18 09:44:39 -07:00
Youchuan Hu
de1b1b6b97 Initial test for TensorsToSegmentationCalculator
This test is a pass-through with no modification of the input tensor. CPU test.

PiperOrigin-RevId: 574210865
2023-10-17 11:22:03 -07:00
MediaPipe Team
dd215e00f5 No public description
PiperOrigin-RevId: 574045894
2023-10-17 00:11:39 -07:00
MediaPipe Team
2e11444f5c Introduce FixGraphBackEdges utils function.
PiperOrigin-RevId: 573925628
2023-10-16 14:13:18 -07:00
MediaPipe Team
a1e1b5d34c Internal change.
PiperOrigin-RevId: 573318330
2023-10-13 14:25:27 -07:00
MediaPipe Team
8993073f35 Internal change
PiperOrigin-RevId: 573254750
2023-10-13 10:25:00 -07:00
MediaPipe Team
1bd800697e GPU_ORIGIN configurable through base options proto.
PiperOrigin-RevId: 573251085
2023-10-13 10:10:52 -07:00
MediaPipe Team
8823046e4b Add check to avoid doing illegal memory access from an invalid iterator from std::prev()
PiperOrigin-RevId: 573248334
2023-10-13 10:02:09 -07:00
MediaPipe Team
652792ebaa Internal change
PiperOrigin-RevId: 573228351
2023-10-13 08:31:17 -07:00
MediaPipe Team
61dc7281e2 No public description
PiperOrigin-RevId: 573107636
2023-10-12 22:39:54 -07:00
MediaPipe Team
2a286cc790 Introduce AlignHandToPoseInWorldCalculator
PiperOrigin-RevId: 572959145
2023-10-12 11:14:03 -07:00
MediaPipe Team
ac2d5cedbd No public description
PiperOrigin-RevId: 572742286
2023-10-11 18:12:09 -07:00
Sebastian Schmidt
a97eaad10f No public description
PiperOrigin-RevId: 572722787
2023-10-11 16:41:08 -07:00
MediaPipe Team
dd29666296 Adding vector versions of input calls to TS GraphRunner API
PiperOrigin-RevId: 572711430
2023-10-11 15:55:08 -07:00
Sebastian Schmidt
4b8fd3b2d0 No public description
PiperOrigin-RevId: 572628807
2023-10-11 11:14:07 -07:00
Copybara-Service
84f6959f9d Merge pull request from kinaryml:c-language-detector-api
PiperOrigin-RevId: 572385111
2023-10-10 15:37:06 -07:00
MediaPipe Team
d6d92354ea Detection postprocessing support quantized tensor.
PiperOrigin-RevId: 572310272
2023-10-10 11:13:54 -07:00
Sebastian Schmidt
df13788883 No public description
PiperOrigin-RevId: 572309070
2023-10-10 11:08:47 -07:00
Sebastian Schmidt
0d5f35d351 No public description
PiperOrigin-RevId: 572292160
2023-10-10 10:23:35 -07:00
Sebastian Schmidt
dc63a5401c No public description
PiperOrigin-RevId: 572266397
2023-10-10 09:02:48 -07:00
Kinar R
91c5f84f9c Removed language_detection_result and moved the necessary containers to language_detector.h 2023-10-10 13:23:25 +05:30
MediaPipe Team
ef6e712a88 Internal change
PiperOrigin-RevId: 572125477
2023-10-09 21:28:52 -07:00
MediaPipe Team
f72542ae5d Internal change
PiperOrigin-RevId: 572122579
2023-10-09 21:11:24 -07:00
MediaPipe Team
ac954215cf Internal change
PiperOrigin-RevId: 572121726
2023-10-09 21:06:32 -07:00
MediaPipe Team
3adc068e97 Add OnCameraBoundListener and support for landscape orientation to CameraXPreviewHelper
PiperOrigin-RevId: 572054649
2023-10-09 15:21:46 -07:00
MediaPipe Team
3dd6480705 No public description
PiperOrigin-RevId: 572032324
2023-10-09 13:59:35 -07:00
Prianka Liz Kariat
3a97762569 Fixed typo in iOS image segmenter Swift delegate name 2023-10-09 17:48:57 +05:30
Prianka Liz Kariat
6c4b4469ae Updated iOS Image Segmenter documentation to use Swift names 2023-10-09 17:48:39 +05:30
Prianka Liz Kariat
fce7b19ad7 Added a test for getting labels from iOS image segmenter 2023-10-09 17:47:03 +05:30
Prianka Liz Kariat
dd823d16f8 Added property to get labels from iOS Image Segmenter 2023-10-09 17:46:34 +05:30
Sebastian Schmidt
69fe645c43 Update WASM files for 0.10.7 release
PiperOrigin-RevId: 571440444
2023-10-06 14:51:20 -07:00
Chris McClanahan
b503d71be4 No public description
PiperOrigin-RevId: 571412955
2023-10-06 13:00:09 -07:00
Kinar
882ec323f0 Added files for the Language Detector C API and tests 2023-10-06 11:39:23 -07:00
Copybara-Service
830ee092b9 Merge pull request from priankakariatyml:ios-image-segmenter-basic-tests
PiperOrigin-RevId: 571386542
2023-10-06 11:18:47 -07:00
Sebastian Schmidt
90e6a97b22 Fix WasmFileset compilation issue in 3P build
PiperOrigin-RevId: 571345702
2023-10-06 08:36:53 -07:00
Matt Kreileder
7389119a2e No public description
PiperOrigin-RevId: 571296889
2023-10-06 04:15:09 -07:00
MediaPipe Team
44a4dad58e Internal change
PiperOrigin-RevId: 571232374
2023-10-05 22:49:06 -07:00
MediaPipe Team
b3f9587bc2 Add stream API merge utils.
PiperOrigin-RevId: 571124981
2023-10-05 14:09:12 -07:00
Sebastian Schmidt
a1e542fc16 No public description
PiperOrigin-RevId: 571109389
2023-10-05 13:15:31 -07:00
Copybara-Service
24da737272 Merge pull request from priankakariatyml:ios-image-utils-updates
PiperOrigin-RevId: 571087863
2023-10-05 11:53:31 -07:00
Copybara-Service
d686b42b85 Merge pull request from kinaryml:c-text-embedder-api
PiperOrigin-RevId: 571065228
2023-10-05 10:43:30 -07:00
Kinar
ebfd7284c9 Fixed some issues with documentation 2023-10-05 04:25:18 -07:00
Kinar
92e13d43e4 Resolved some issues 2023-10-05 03:54:54 -07:00
MediaPipe Team
edc4db287c No public description
PiperOrigin-RevId: 570904787
2023-10-04 22:18:50 -07:00
MediaPipe Team
7ab3d70aa4 Add stream API presence utils.
PiperOrigin-RevId: 570901832
2023-10-04 21:59:05 -07:00
Nevena Kotlaja
2dd20822be No public description
PiperOrigin-RevId: 570789405
2023-10-04 13:33:51 -07:00
Sebastian Schmidt
1d8bd9c3ee No public description
PiperOrigin-RevId: 570765754
2023-10-04 12:14:31 -07:00
Daniel Cheng
d2baba6dbb Internal change
PiperOrigin-RevId: 570745425
2023-10-04 11:09:36 -07:00
MediaPipe Team
3b99f8d9dd Introduce SetJointsVisibilityCalculator
PiperOrigin-RevId: 570745171
2023-10-04 11:04:40 -07:00
MediaPipe Team
c81624d7b2 Introduce CombineJointsCalculator
PiperOrigin-RevId: 570739088
2023-10-04 10:45:11 -07:00
Daniel Cheng
7f1c17065a Prefix status macro implementation with MP_.
This makes it less likely for the implementation to conflict with other
ASSIGN_OR_RETURN() and RETURN_IF_ERROR() implementations.

PiperOrigin-RevId: 570726994
2023-10-04 10:04:59 -07:00
MediaPipe Team
9bb042cc86 GlSurfaceViewRenderer: Capture graph output texture
Captures the original graph output texture, not what has been copied to the screen. This will be important for zooming to prevent top/bottom letterboxes, and it preserves the original quality, so that high resolution images can be used.

PiperOrigin-RevId: 570604422
2023-10-04 00:07:13 -07:00
Prianka Liz Kariat
da7013c746 Updated error messages in MPPImage Utils 2023-10-03 23:59:46 +05:30
Prianka Liz Kariat
cebfa1cdac Fixed error messages 2023-10-03 23:52:58 +05:30
MediaPipe Team
da8fcb6bb2 Smooth pose landmarks
PiperOrigin-RevId: 570441366
2023-10-03 11:12:46 -07:00
Sebastian Schmidt
a72839ef99 See memory of freed result to nullptr
PiperOrigin-RevId: 570410751
2023-10-03 09:33:48 -07:00
MediaPipe Team
d0183b2c70 Fixes typos in the file mediapipe/python/pybind/image.cc and mediapipe/python/pybind
/image_frame.cc.

PiperOrigin-RevId: 570388388
2023-10-03 08:01:25 -07:00
Prianka Liz Kariat
0ee9b7f86e Added iOS language detector options helpers 2023-10-03 19:21:05 +05:30
Prianka Liz Kariat
38de7493df Added iOS language detector results 2023-10-03 19:20:45 +05:30
Prianka Liz Kariat
3c13e4b6d6 Added iOS language detector options 2023-10-03 19:20:33 +05:30
Prianka Liz Kariat
3067c20955 Added iOS pose landmarker result helpers 2023-10-03 19:18:29 +05:30
Prianka Liz Kariat
c560032a91 Added iOS pose landmarker options 2023-10-03 19:18:06 +05:30
Prianka Liz Kariat
8d5cf33ca4 Added iOS Pose Landmarker Result 2023-10-03 19:17:49 +05:30
Kinar
753ba916a1 Fixed some typos 2023-10-03 01:51:18 -07:00
Kinar
3564fc0d9b Added files for the TextEmbedder C API and tests 2023-10-03 01:48:07 -07:00
MediaPipe Team
5366aa9d0a Internal update
PiperOrigin-RevId: 570204415
2023-10-02 15:55:31 -07:00
Sebastian Schmidt
a00759007d Add error handling to C API
PiperOrigin-RevId: 570094642
2023-10-02 09:49:09 -07:00
Sebastian Schmidt
c7402efe5e Add End to End test for Text Classifier C API
PiperOrigin-RevId: 569658768
2023-09-29 20:53:54 -07:00
Sebastian Schmidt
96fa10b906 Add unit tests for C layer for the input types of Text Classifier
PiperOrigin-RevId: 569553038
2023-09-29 12:08:00 -07:00
Sebastian Schmidt
6915a79e28 Add tests for C API containers
PiperOrigin-RevId: 569526282
2023-09-29 10:27:15 -07:00
Sebastian Schmidt
d4561fb5c2 Do not use full filename when FileLocator decides which asset to load
Fixes https://github.com/google/mediapipe/issues/4819

PiperOrigin-RevId: 569506907
2023-09-29 09:11:01 -07:00
MediaPipe Team
09a51bcdeb Internal change
PiperOrigin-RevId: 569310433
2023-09-28 15:44:37 -07:00
Sebastian Schmidt
5ca1be6f21 Populate the classification result output param instead of a copy
PiperOrigin-RevId: 569293617
2023-09-28 14:38:02 -07:00
MediaPipe Team
f78f24f576 segmentation smoothing stream utility function.
PiperOrigin-RevId: 569283980
2023-09-28 14:03:24 -07:00
Sebastian Schmidt
636cf99a3e Fix memory access issue in C layer
PiperOrigin-RevId: 569279959
2023-09-28 13:50:28 -07:00
MediaPipe Team
e169849041 No public description
PiperOrigin-RevId: 569274219
2023-09-28 13:31:00 -07:00
Sebastian Schmidt
33d6143a1a Don't convert nullptr to std::string in C layer
PiperOrigin-RevId: 569232756
2023-09-28 11:04:44 -07:00
Prianka Liz Kariat
120f82508c Chnaged de allocation method in data provider release callback 2023-09-28 21:43:18 +05:30
Prianka Liz Kariat
8ea805b6f0 Added methods to create iOS MPImage with source type UIImage from a C++ image. 2023-09-28 21:37:03 +05:30
MediaPipe Team
a577dc3043 smoothing stream utility function.
PiperOrigin-RevId: 569074973
2023-09-27 23:19:37 -07:00
MediaPipe Team
9edb4cd753 Introduce LandmarksTransformationCalculator
PiperOrigin-RevId: 569050686
2023-09-27 21:12:19 -07:00
MediaPipe Team
66a279418c tensor_to_joints stream utility function.
PiperOrigin-RevId: 569043195
2023-09-27 20:34:20 -07:00
MediaPipe Team
0ae9ff6b98 Introduce TensorToJointsCalculator
PiperOrigin-RevId: 569040914
2023-09-27 20:27:24 -07:00
MediaPipe Team
da02052c70 landmarks_to_tensor stream utility function.
PiperOrigin-RevId: 569003241
2023-09-27 17:13:46 -07:00
MediaPipe Team
8837b49026 get_vector_item stream utility function.
PiperOrigin-RevId: 568998504
2023-09-27 16:55:43 -07:00
MediaPipe Team
2ecccaf076 concatenate stream utility function.
PiperOrigin-RevId: 568997695
2023-09-27 16:50:48 -07:00
MediaPipe Team
983fda5d4e No public description
PiperOrigin-RevId: 568953918
2023-09-27 14:04:33 -07:00
MediaPipe Team
8f8c66430f Update PackMediaSequenceCalculator to support index feature inputs on the CLIP_MEDIA_ input tag.
For Detection protos representing index features, the `label` field might be empty.

With this change, only the `Detection::score` field is required, and `Detection.label` and `Detection.label_id` are both optional but at least one of them should be set.

PiperOrigin-RevId: 568944596
2023-09-27 13:33:55 -07:00
MediaPipe Team
698b154ff4 Only recreate immutable texture when necessary for Android TensorsToSegmentationCalculator.
PiperOrigin-RevId: 568937611
2023-09-27 13:09:29 -07:00
MediaPipe Team
787371cfba No public description
PiperOrigin-RevId: 568927297
2023-09-27 12:27:57 -07:00
Sebastian Schmidt
61ce228576 Add cc_binary target for C Libraries
PiperOrigin-RevId: 568902427
2023-09-27 11:04:41 -07:00
Sebastian Schmidt
b01ad84c6f Add export declaration to FaceDetector.detect()
PiperOrigin-RevId: 568872459
2023-09-27 09:28:53 -07:00
Sebastian Schmidt
3134625508 No public description
PiperOrigin-RevId: 568660415
2023-09-26 15:14:16 -07:00
Copybara-Service
0417817886 Merge pull request from priankakariatyml:ios-vision-api-name-changes
PiperOrigin-RevId: 568622587
2023-09-26 12:51:31 -07:00
MediaPipe Team
199b42278b Fixes multiple typos in the tasks internal files.
PiperOrigin-RevId: 568585517
2023-09-26 10:45:07 -07:00
Sebastian Schmidt
e5e75eac5e No public description
PiperOrigin-RevId: 568581409
2023-09-26 10:34:47 -07:00
MediaPipe Team
573fdad173 Add export_model_with_tokenizer to Text Classifier API.
PiperOrigin-RevId: 567744604
2023-09-22 16:32:38 -07:00
Chris McClanahan
9d85141227 No public description
PiperOrigin-RevId: 567726262
2023-09-22 15:07:11 -07:00
Prianka Liz Kariat
b1f717e111 Added iOS Image Segmenter tests for methods with completion handlers 2023-09-23 00:15:10 +05:30
Prianka Liz Kariat
2d4e5a75b3 Updated iOS Object Detector Objective C API names 2023-09-23 00:08:43 +05:30
Prianka Liz Kariat
435bee71e8 Updated iOS hand landmarker Objective C API names 2023-09-23 00:08:07 +05:30
Prianka Liz Kariat
9d42744f8a Updated iOS Face Landmarker Objective C API names 2023-09-23 00:07:49 +05:30
Prianka Liz Kariat
d7c57e4eda Updated iOS Face Detector Objective C API names 2023-09-23 00:07:20 +05:30
MediaPipe Team
34cedb980b No public description
PiperOrigin-RevId: 567562548
2023-09-22 02:24:25 -07:00
MediaPipe Team
743118a04a Fixes multiple typos in the tasks internal files.
PiperOrigin-RevId: 567506050
2023-09-21 21:24:45 -07:00
MediaPipe Team
859d90b68b No public description
PiperOrigin-RevId: 567439132
2023-09-21 15:40:19 -07:00
MediaPipe Team
abf0ee892a Internal Changes
PiperOrigin-RevId: 567374962
2023-09-21 11:55:02 -07:00
MediaPipe Team
82d83f2dd8 Update glog to latest commit
PiperOrigin-RevId: 567337708
2023-09-21 10:10:04 -07:00
Mark McDonald
acca31503a
Remove pinned versions from deps 2023-09-21 13:21:45 +08:00
Copybara-Service
19c9d328cb Merge pull request from priankakariatyml:ios-image-segmenter-basic-tests
PiperOrigin-RevId: 567102079
2023-09-20 15:46:14 -07:00
MediaPipe Team
223641a73c Fix depth condition bug when only depth condition is configured.
PiperOrigin-RevId: 567080598
2023-09-20 14:29:18 -07:00
Mark McDonald
308f4f0e73
Adds an empty skeleton project for iOS docgen.
Dependencies tracked in `Podfile` are used to generate reference docs.
2023-09-20 16:53:34 +08:00
MediaPipe Team
41a012721f Fix bug missing SHOW_RESULT in image generator
PiperOrigin-RevId: 566810243
2023-09-19 18:37:35 -07:00
MediaPipe Team
867d5dc5a7 No public description
PiperOrigin-RevId: 566794388
2023-09-19 17:27:55 -07:00
MediaPipe Team
bbf40cba87 split stream utility function.
PiperOrigin-RevId: 566722901
2023-09-19 13:21:39 -07:00
Copybara-Service
58bb2d1b92 Merge pull request from shmishra99:patch-1
PiperOrigin-RevId: 566676755
2023-09-19 10:43:12 -07:00
Sebastian Schmidt
12600e03e9 Do not convert milliseconds to microseconds twice
Fixes https://github.com/google/mediapipe/issues/4809

PiperOrigin-RevId: 566644379
2023-09-19 08:47:31 -07:00
Prianka Liz Kariat
f1a5c8d549 Fixed formatting in MPPImageSegmenter.mm 2023-09-19 20:10:23 +05:30
Prianka Liz Kariat
d9d4016334 Uncommented live stream test in iOS image segmenter tests 2023-09-19 20:06:25 +05:30
Prianka Liz Kariat
bac60548dc Added selfie segmentation and running mode tests to image segmenter 2023-09-19 20:00:41 +05:30
Prianka Liz Kariat
08a5d55ac1 Updated interface of iOS image segmenter 2023-09-19 19:59:33 +05:30
Prianka Liz Kariat
6e80941215 Fixed premature deallocation of C++ masks in iOS Image Segmenter 2023-09-19 19:59:07 +05:30
Kinar
1c40ecf8a5 Added files for Face Stylizer Unit Tests 2023-09-19 03:08:01 -07:00
MediaPipe Team
0ed199186b Fix glScalerCalculator not clearing background in FIT mode
In FIT mode, the image is scaled in an aspect ratio preserving way. That means, the calculator does not render a full-screen quad. The letterbox areas are not drawn. This can cause artifacts, e.g. when MediaPipe reuses the GPU buffer from some other operation. This CL always clears the render target in FIT mode.

PiperOrigin-RevId: 566562715
2023-09-19 02:19:22 -07:00
Shivam Mishra
fd062a2c3f
Remove 'awaiting' labels when user issue/PR updated.
Remove the label "stat:awaiting response", when issue/PR unstale.
2023-09-19 11:28:20 +05:30
MediaPipe Team
94cda40a83 No public description
PiperOrigin-RevId: 566435327
2023-09-18 15:50:10 -07:00
MediaPipe Team
36f78f6e4a threshold stream utility function.
PiperOrigin-RevId: 566417914
2023-09-18 14:44:28 -07:00
MediaPipe Team
58a7790081 detections_to_rects stream utility function.
PiperOrigin-RevId: 566358715
2023-09-18 11:17:27 -07:00
MediaPipe Team
f4477f1739 A minor typo fix in model maker image classifier hyperparameters python docstring.
PiperOrigin-RevId: 566355937
2023-09-18 11:07:58 -07:00
Copybara-Service
d5fa4a157e Merge pull request from priankakariatyml:ios-image-segmenter-basic-tests
PiperOrigin-RevId: 565797057
2023-09-15 15:45:54 -07:00
Copybara-Service
838c89a3ff Merge pull request from priankakariatyml:ios-face-stylizer
PiperOrigin-RevId: 565784200
2023-09-15 14:49:50 -07:00
Sebastian Schmidt
94477b1342 No public description
PiperOrigin-RevId: 565755048
2023-09-15 12:55:00 -07:00
Sebastian Schmidt
a933e324b5 Add export declaration for FaceDetector
Fixes https://github.com/google/mediapipe/issues/4799

PiperOrigin-RevId: 565706741
2023-09-15 10:06:06 -07:00
Sebastian Schmidt
30590fe8d3 Add helper to create Connection array
PiperOrigin-RevId: 565706612
2023-09-15 10:00:28 -07:00
Prianka Liz Kariat
0f511d52d6 Fixed typo in iOS MPPImageSegmenterResult helpers 2023-09-15 14:24:15 +05:30
Prianka Liz Kariat
b3be1418da Updated multiply function in iOS Image Segmenter tests to use C++ vectors 2023-09-15 14:21:33 +05:30
Prianka Liz Kariat
fad7f9cdb4 Added iOS image segmenter basic Objective C tests 2023-09-15 14:18:54 +05:30
Prianka Liz Kariat
d3f7368b27 Added iOS MPPMask test utils 2023-09-15 14:18:23 +05:30
Prianka Liz Kariat
81ec5801ea Added new initializers for iOS MPPImage in test utils 2023-09-15 14:18:10 +05:30
Prianka Liz Kariat
e0b059da58 Added iOS MPPFileInfo for tests 2023-09-15 14:16:50 +05:30
Prianka Liz Kariat
21d000490c Added iOS face stylizer header 2023-09-15 14:06:11 +05:30
Prianka Liz Kariat
bb93b775f4 Added iOS face stylizer options 2023-09-15 14:05:59 +05:30
Prianka Liz Kariat
a259300bfe Added iOS face stylizer result 2023-09-15 14:05:38 +05:30
Sebastian Schmidt
4a8a811373 No public description
PiperOrigin-RevId: 565516592
2023-09-14 17:00:02 -07:00
MediaPipe Team
81964608ba Fixes typo in MediaPipe namespace
PiperOrigin-RevId: 565478366
2023-09-14 14:32:35 -07:00
Daniel Cheng
65e7cd5236 Fix win32 build break in mediapipe.
buffer_aligned_size_ is not used in tflite_support, but is used
in mediapipe.

PiperOrigin-RevId: 565477047
2023-09-14 14:27:05 -07:00
MediaPipe Team
f2b11bf250 No public description
PiperOrigin-RevId: 565446429
2023-09-14 12:41:12 -07:00
806 changed files with 49481 additions and 5235 deletions
.bazelrc
.github/workflows
WORKSPACE
docs/MediaPipeTasksDocGen
MediaPipeTasksDocGen.xcodeproj
project.pbxproj
project.xcworkspace
xcuserdata/macd.xcuserdatad/xcschemes
MediaPipeTasksDocGen
PodfileREADME.md
mediapipe/calculators
audio
core
image
tensor
BUILDaudio_to_tensor_calculator.ccaudio_to_tensor_calculator.protobert_preprocessor_calculator.ccbert_preprocessor_calculator_test.ccimage_to_tensor_calculator.ccimage_to_tensor_calculator_test.ccimage_to_tensor_converter_frame_buffer.ccimage_to_tensor_converter_gl_buffer.ccimage_to_tensor_converter_gl_texture.ccimage_to_tensor_converter_metal.ccimage_to_tensor_converter_opencv.ccimage_to_tensor_utils.ccimage_to_tensor_utils.hinference_calculator_cpu.ccinference_calculator_gl.ccinference_calculator_gl_advanced.ccinference_calculator_metal.ccinference_calculator_xnnpack.ccregex_preprocessor_calculator.ccregex_preprocessor_calculator_test.cctensor_converter_calculator.cctensor_converter_calculator.prototensor_converter_calculator_test.cctensor_converter_cpu.cctensor_converter_cpu.htensor_converter_cpu_test.cctensor_to_joints_calculator.cctensor_to_joints_calculator.htensor_to_joints_calculator.prototensor_to_joints_calculator_test.cctensors_to_classification_calculator.cctensors_to_classification_calculator.prototensors_to_detections_calculator.cctensors_to_detections_calculator.prototensors_to_landmarks_calculator.cctensors_to_segmentation_calculator.cctensors_to_segmentation_calculator_test.cctensors_to_segmentation_calculator_test_utils.cctensors_to_segmentation_calculator_test_utils.htensors_to_segmentation_calculator_test_utils_test.cctensors_to_segmentation_converter.htensors_to_segmentation_converter_opencv.cctensors_to_segmentation_converter_opencv.htensors_to_segmentation_utils.cctensors_to_segmentation_utils.htensors_to_segmentation_utils_test.ccuniversal_sentence_encoder_preprocessor_calculator_test.cc
tensorflow
tflite

View File

@ -98,6 +98,9 @@ build:darwin_arm64 --apple_platform_type=macos
build:darwin_arm64 --macos_minimum_os=10.16
build:darwin_arm64 --cpu=darwin_arm64
# Turn off maximum stdout size
build --experimental_ui_max_stdouterr_bytes=-1
# This bazelrc file is meant to be written by a setup script.
try-import %workspace%/.configure.bazelrc

View File

@ -39,7 +39,9 @@ jobs:
# Limit the No. of API calls in one run default value is 30.
operations-per-run: 500
# Prevent to remove stale label when PRs or issues are updated.
remove-stale-when-updated: false
remove-stale-when-updated: true
# List of labels to remove when issues/PRs unstale.
labels-to-remove-when-unstale: 'stat:awaiting response'
# comment on issue if not active for more then 7 days.
stale-issue-message: 'This issue has been marked stale because it has no recent activity since 7 days. It will be closed if no further activity occurs. Thank you.'
# comment on PR if not active for more then 14 days.

View File

@ -154,19 +154,19 @@ http_archive(
# 2020-08-21
http_archive(
name = "com_github_glog_glog",
strip_prefix = "glog-3a0d4d22c5ae0b9a2216988411cfa6bf860cc372",
sha256 = "170d08f80210b82d95563f4723a15095eff1aad1863000e8eeb569c96a98fefb",
strip_prefix = "glog-0.6.0",
sha256 = "8a83bf982f37bb70825df71a9709fa90ea9f4447fb3c099e1d720a439d88bad6",
urls = [
"https://github.com/google/glog/archive/3a0d4d22c5ae0b9a2216988411cfa6bf860cc372.zip",
"https://github.com/google/glog/archive/v0.6.0.tar.gz",
],
)
http_archive(
name = "com_github_glog_glog_no_gflags",
strip_prefix = "glog-3a0d4d22c5ae0b9a2216988411cfa6bf860cc372",
sha256 = "170d08f80210b82d95563f4723a15095eff1aad1863000e8eeb569c96a98fefb",
strip_prefix = "glog-0.6.0",
sha256 = "8a83bf982f37bb70825df71a9709fa90ea9f4447fb3c099e1d720a439d88bad6",
build_file = "@//third_party:glog_no_gflags.BUILD",
urls = [
"https://github.com/google/glog/archive/3a0d4d22c5ae0b9a2216988411cfa6bf860cc372.zip",
"https://github.com/google/glog/archive/v0.6.0.tar.gz",
],
patches = [
"@//third_party:com_github_glog_glog.diff",
@ -176,6 +176,25 @@ http_archive(
],
)
# 2023-06-05
# This version of Glog is required for Windows support, but currently causes
# crashes on some Android devices.
http_archive(
name = "com_github_glog_glog_windows",
strip_prefix = "glog-3a0d4d22c5ae0b9a2216988411cfa6bf860cc372",
sha256 = "170d08f80210b82d95563f4723a15095eff1aad1863000e8eeb569c96a98fefb",
urls = [
"https://github.com/google/glog/archive/3a0d4d22c5ae0b9a2216988411cfa6bf860cc372.zip",
],
patches = [
"@//third_party:com_github_glog_glog.diff",
"@//third_party:com_github_glog_glog_windows_patch.diff",
],
patch_args = [
"-p1",
],
)
# easyexif
http_archive(
name = "easyexif",
@ -225,16 +244,14 @@ http_archive(
# sentencepiece
http_archive(
name = "com_google_sentencepiece",
strip_prefix = "sentencepiece-1.0.0",
sha256 = "c05901f30a1d0ed64cbcf40eba08e48894e1b0e985777217b7c9036cac631346",
strip_prefix = "sentencepiece-0.1.96",
sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
urls = [
"https://github.com/google/sentencepiece/archive/1.0.0.zip",
],
patches = [
"@//third_party:com_google_sentencepiece_no_gflag_no_gtest.diff",
"https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"
],
build_file = "@//third_party:sentencepiece.BUILD",
patches = ["@//third_party:com_google_sentencepiece.diff"],
patch_args = ["-p1"],
repo_mapping = {"@com_google_glog" : "@com_github_glog_glog_no_gflags"},
)
http_archive(
@ -496,6 +513,9 @@ http_archive(
"@//third_party:org_tensorflow_system_python.diff",
# Diff is generated with a script, don't update it manually.
"@//third_party:org_tensorflow_custom_ops.diff",
# Works around Bazel issue with objc_library.
# See https://github.com/bazelbuild/bazel/issues/19912
"@//third_party:org_tensorflow_objc_build_fixes.diff",
],
patch_args = [
"-p1",

View File

@ -0,0 +1,342 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 56;
objects = {
/* Begin PBXBuildFile section */
8566B55D2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h in Headers */ = {isa = PBXBuildFile; fileRef = 8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */; settings = {ATTRIBUTES = (Public, ); }; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = MediaPipeTasksDocGen.framework; sourceTree = BUILT_PRODUCTS_DIR; };
8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MediaPipeTasksDocGen.h; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
8566B5562ABABF9A00AAB22A /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
8566B54F2ABABF9A00AAB22A = {
isa = PBXGroup;
children = (
8566B55B2ABABF9A00AAB22A /* MediaPipeTasksDocGen */,
8566B55A2ABABF9A00AAB22A /* Products */,
);
sourceTree = "<group>";
};
8566B55A2ABABF9A00AAB22A /* Products */ = {
isa = PBXGroup;
children = (
8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */,
);
name = Products;
sourceTree = "<group>";
};
8566B55B2ABABF9A00AAB22A /* MediaPipeTasksDocGen */ = {
isa = PBXGroup;
children = (
8566B55C2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h */,
);
path = MediaPipeTasksDocGen;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
8566B5542ABABF9A00AAB22A /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
8566B55D2ABABF9A00AAB22A /* MediaPipeTasksDocGen.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
8566B5582ABABF9A00AAB22A /* MediaPipeTasksDocGen */ = {
isa = PBXNativeTarget;
buildConfigurationList = 8566B5602ABABF9A00AAB22A /* Build configuration list for PBXNativeTarget "MediaPipeTasksDocGen" */;
buildPhases = (
8566B5542ABABF9A00AAB22A /* Headers */,
8566B5552ABABF9A00AAB22A /* Sources */,
8566B5562ABABF9A00AAB22A /* Frameworks */,
8566B5572ABABF9A00AAB22A /* Resources */,
);
buildRules = (
);
dependencies = (
);
name = MediaPipeTasksDocGen;
productName = MediaPipeTasksDocGen;
productReference = 8566B5592ABABF9A00AAB22A /* MediaPipeTasksDocGen.framework */;
productType = "com.apple.product-type.framework";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
8566B5502ABABF9A00AAB22A /* Project object */ = {
isa = PBXProject;
attributes = {
BuildIndependentTargetsInParallel = 1;
LastUpgradeCheck = 1430;
TargetAttributes = {
8566B5582ABABF9A00AAB22A = {
CreatedOnToolsVersion = 14.3.1;
};
};
};
buildConfigurationList = 8566B5532ABABF9A00AAB22A /* Build configuration list for PBXProject "MediaPipeTasksDocGen" */;
compatibilityVersion = "Xcode 14.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = 8566B54F2ABABF9A00AAB22A;
productRefGroup = 8566B55A2ABABF9A00AAB22A /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
8566B5582ABABF9A00AAB22A /* MediaPipeTasksDocGen */,
);
};
/* End PBXProject section */
/* Begin PBXResourcesBuildPhase section */
8566B5572ABABF9A00AAB22A /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
8566B5552ABABF9A00AAB22A /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
8566B55E2ABABF9A00AAB22A /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
VERSIONING_SYSTEM = "apple-generic";
VERSION_INFO_PREFIX = "";
};
name = Debug;
};
8566B55F2ABABF9A00AAB22A /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
CURRENT_PROJECT_VERSION = 1;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
VERSIONING_SYSTEM = "apple-generic";
VERSION_INFO_PREFIX = "";
};
name = Release;
};
8566B5612ABABF9A00AAB22A /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEFINES_MODULE = YES;
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_MODULE_VERIFIER = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MARKETING_VERSION = 1.0;
MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu11 gnu++20";
PRODUCT_BUNDLE_IDENTIFIER = com.google.mediapipe.MediaPipeTasksDocGen;
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
SKIP_INSTALL = YES;
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
8566B5622ABABF9A00AAB22A /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEFINES_MODULE = YES;
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_MODULE_VERIFIER = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MARKETING_VERSION = 1.0;
MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++";
MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu11 gnu++20";
PRODUCT_BUNDLE_IDENTIFIER = com.google.mediapipe.MediaPipeTasksDocGen;
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
SKIP_INSTALL = YES;
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
8566B5532ABABF9A00AAB22A /* Build configuration list for PBXProject "MediaPipeTasksDocGen" */ = {
isa = XCConfigurationList;
buildConfigurations = (
8566B55E2ABABF9A00AAB22A /* Debug */,
8566B55F2ABABF9A00AAB22A /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
8566B5602ABABF9A00AAB22A /* Build configuration list for PBXNativeTarget "MediaPipeTasksDocGen" */ = {
isa = XCConfigurationList;
buildConfigurations = (
8566B5612ABABF9A00AAB22A /* Debug */,
8566B5622ABABF9A00AAB22A /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 8566B5502ABABF9A00AAB22A /* Project object */;
}

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:">
</FileRef>
</Workspace>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>

View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>MediaPipeTasksDocGen.xcscheme_^#shared#^_</key>
<dict>
<key>orderHint</key>
<integer>0</integer>
</dict>
</dict>
</dict>
</plist>

View File

@ -0,0 +1,17 @@
//
// MediaPipeTasksDocGen.h
// MediaPipeTasksDocGen
//
// Created by Mark McDonald on 20/9/2023.
//
#import <Foundation/Foundation.h>
//! Project version number for MediaPipeTasksDocGen.
FOUNDATION_EXPORT double MediaPipeTasksDocGenVersionNumber;
//! Project version string for MediaPipeTasksDocGen.
FOUNDATION_EXPORT const unsigned char MediaPipeTasksDocGenVersionString[];
// In this header, you should import all the public headers of your framework using statements like
// #import <MediaPipeTasksDocGen/PublicHeader.h>

View File

@ -0,0 +1,11 @@
# Uncomment the next line to define a global platform for your project
platform :ios, '15.0'
target 'MediaPipeTasksDocGen' do
# Comment the next line if you don't want to use dynamic frameworks
use_frameworks!
# Pods for MediaPipeTasksDocGen
pod 'MediaPipeTasksText'
pod 'MediaPipeTasksVision'
end

View File

@ -0,0 +1,9 @@
# MediaPipeTasksDocGen
This empty project is used to generate reference documentation for the
ObjectiveC and Swift libraries.
Docs are generated using [Jazzy](https://github.com/realm/jazzy) and published
to [the developer site](https://developers.google.com/mediapipe/solutions/).
To bump the API version used, edit [`Podfile`](./Podfile).

View File

@ -80,7 +80,7 @@ message SpectrogramCalculatorOptions {
// If use_local_timestamp is true, the output packet's timestamp is based on
// the last sample of the packet and it's inferred from the latest input
// packet's timestamp. If false, the output packet's timestamp is based on
// the cumulative timestamping, which is inferred from the intial input
// the cumulative timestamping, which is inferred from the initial input
// timestamp and the cumulative number of samples.
optional bool use_local_timestamp = 8 [default = false];
}

View File

@ -66,7 +66,7 @@ message TimeSeriesFramerCalculatorOptions {
// If use_local_timestamp is true, the output packet's timestamp is based on
// the last sample of the packet and it's inferred from the latest input
// packet's timestamp. If false, the output packet's timestamp is based on
// the cumulative timestamping, which is inferred from the intial input
// the cumulative timestamping, which is inferred from the initial input
// timestamp and the cumulative number of samples.
optional bool use_local_timestamp = 6 [default = false];
}

View File

@ -21,10 +21,10 @@ licenses(["notice"])
package(default_visibility = ["//visibility:public"])
selects.config_setting_group(
name = "ios_or_disable_gpu",
name = "apple_or_disable_gpu",
match_any = [
"//mediapipe/gpu:disable_gpu",
"//mediapipe:ios",
"//mediapipe:apple",
],
)
@ -299,7 +299,7 @@ cc_library(
"//mediapipe/util:render_data_cc_proto",
"@org_tensorflow//tensorflow/lite:framework",
] + select({
":ios_or_disable_gpu": [],
":apple_or_disable_gpu": [],
"//conditions:default": [
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
],
@ -325,6 +325,7 @@ cc_library(
":concatenate_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:body_rig_cc_proto",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
@ -726,6 +727,7 @@ cc_library(
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
],
alwayslink = 1,
)
@ -741,6 +743,7 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"//mediapipe/framework/tool:options_util",
"//mediapipe/util:packet_test_util",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
@ -912,7 +915,7 @@ cc_library(
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
] + select({
":ios_or_disable_gpu": [],
":apple_or_disable_gpu": [],
"//conditions:default": [
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
],
@ -944,6 +947,7 @@ cc_library(
deps = [
":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:body_rig_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
@ -1389,3 +1393,26 @@ cc_test(
"@com_google_absl//absl/types:optional",
],
)
cc_library(
name = "value_or_default_calculator",
srcs = ["value_or_default_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:status",
],
alwayslink = True,
)
cc_test(
name = "value_or_default_calculator_test",
srcs = ["value_or_default_calculator_test.cc"],
deps = [
":value_or_default_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework:packet",
"//mediapipe/framework/port:gtest_main",
],
)

View File

@ -13,6 +13,7 @@
// limitations under the License.
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
@ -163,6 +164,75 @@ TEST_F(BeginEndLoopCalculatorGraphTest, MultipleVectors) {
PacketOfIntsEq(input_timestamp2, std::vector<int>{3, 4})));
}
TEST(BeginEndLoopCalculatorPossibleDataRaceTest,
EndLoopForIntegersDoesNotRace) {
auto graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
num_threads: 4
input_stream: "ints"
node {
calculator: "BeginLoopIntegerCalculator"
input_stream: "ITERABLE:ints"
output_stream: "ITEM:int"
output_stream: "BATCH_END:timestamp"
}
node {
calculator: "IncrementCalculator"
input_stream: "int"
output_stream: "int_plus_one"
}
# BEGIN: Data race possibility
# EndLoop###Calculator and another calculator using the same input
# may introduce race due to EndLoop###Calculator possibly consuming
# packet.
node {
calculator: "EndLoopIntegersCalculator"
input_stream: "ITEM:int_plus_one"
input_stream: "BATCH_END:timestamp"
output_stream: "ITERABLE:ints_plus_one"
}
node {
calculator: "IncrementCalculator"
input_stream: "int_plus_one"
output_stream: "int_plus_two"
}
# END: Data race possibility
node {
calculator: "EndLoopIntegersCalculator"
input_stream: "ITEM:int_plus_two"
input_stream: "BATCH_END:timestamp"
output_stream: "ITERABLE:ints_plus_two"
}
)pb");
std::vector<Packet> int_plus_one_packets;
tool::AddVectorSink("ints_plus_one", &graph_config, &int_plus_one_packets);
std::vector<Packet> int_original_packets;
tool::AddVectorSink("ints_plus_two", &graph_config, &int_original_packets);
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
for (int i = 0; i < 100; ++i) {
std::vector<int> ints = {i, i + 1, i + 2};
Timestamp ts = Timestamp(i);
MP_ASSERT_OK(graph.AddPacketToInputStream(
"ints", MakePacket<std::vector<int>>(std::move(ints)).At(ts)));
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_THAT(int_plus_one_packets,
testing::ElementsAre(
PacketOfIntsEq(ts, std::vector<int>{i + 1, i + 2, i + 3})));
EXPECT_THAT(int_original_packets,
testing::ElementsAre(
PacketOfIntsEq(ts, std::vector<int>{i + 2, i + 3, i + 4})));
int_plus_one_packets.clear();
int_original_packets.clear();
}
MP_ASSERT_OK(graph.CloseAllPacketSources());
MP_ASSERT_OK(graph.WaitUntilDone());
}
// Passes non empty vector through or outputs empty vector in case of timestamp
// bound update.
class PassThroughOrEmptyVectorCalculator : public CalculatorBase {

View File

@ -92,7 +92,7 @@ class BypassCalculator : public Node {
auto options = cc->Options<BypassCalculatorOptions>();
RET_CHECK_EQ(options.pass_input_stream().size(),
options.pass_output_stream().size());
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto pass_streams,
GetPassMap(options, *cc->Inputs().TagMap(), *cc->Outputs().TagMap()));
std::set<CollectionItemId> pass_out;
@ -121,8 +121,9 @@ class BypassCalculator : public Node {
// Saves the map of passthrough input and output stream ids.
absl::Status Open(CalculatorContext* cc) override {
auto options = cc->Options<BypassCalculatorOptions>();
ASSIGN_OR_RETURN(pass_streams_, GetPassMap(options, *cc->Inputs().TagMap(),
*cc->Outputs().TagMap()));
MP_ASSIGN_OR_RETURN(
pass_streams_,
GetPassMap(options, *cc->Inputs().TagMap(), *cc->Outputs().TagMap()));
return absl::OkStatus();
}

View File

@ -18,6 +18,7 @@
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/body_rig.pb.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
@ -128,6 +129,19 @@ class ConcatenateClassificationListCalculator
};
MEDIAPIPE_REGISTER_NODE(ConcatenateClassificationListCalculator);
class ConcatenateJointListCalculator
: public ConcatenateListsCalculator<Joint, JointList> {
protected:
int ListSize(const JointList& list) const override {
return list.joint_size();
}
const Joint GetItem(const JointList& list, int idx) const override {
return list.joint(idx);
}
Joint* AddItem(JointList& list) const override { return list.add_joint(); }
};
MEDIAPIPE_REGISTER_NODE(ConcatenateJointListCalculator);
} // namespace api2
} // namespace mediapipe

View File

@ -55,16 +55,16 @@ class EndLoopCalculator : public CalculatorBase {
if (!input_stream_collection_) {
input_stream_collection_.reset(new IterableT);
}
// Try to consume the item and move it into the collection. If the items
// are not consumable, then try to copy them instead. If the items are
// not copyable, then an error will be returned.
auto item_ptr_or = cc->Inputs().Tag("ITEM").Value().Consume<ItemT>();
if (item_ptr_or.ok()) {
input_stream_collection_->push_back(std::move(*item_ptr_or.value()));
if constexpr (std::is_copy_constructible_v<ItemT>) {
input_stream_collection_->push_back(
cc->Inputs().Tag("ITEM").Get<ItemT>());
} else {
if constexpr (std::is_copy_constructible_v<ItemT>) {
input_stream_collection_->push_back(
cc->Inputs().Tag("ITEM").template Get<ItemT>());
// Try to consume the item and move it into the collection. Return an
// error if the items are not consumable.
auto item_ptr_or = cc->Inputs().Tag("ITEM").Value().Consume<ItemT>();
if (item_ptr_or.ok()) {
input_stream_collection_->push_back(std::move(*item_ptr_or.value()));
} else {
return absl::InternalError(
"The item type is not copiable. Consider making the "

View File

@ -71,7 +71,7 @@ TEST_F(PacketSequencerCalculatorTest, IsRegistered) {
CalculatorBaseRegistry::IsRegistered("PacketSequencerCalculator"));
}
// Shows how control packets recieve timestamps before and after frame packets
// Shows how control packets receive timestamps before and after frame packets
// have arrived.
TEST_F(PacketSequencerCalculatorTest, ChannelEarly) {
CalculatorGraphConfig::Node node_config = BuildNodeConfig();

View File

@ -17,6 +17,7 @@
#include <set>
#include <string>
#include "absl/status/status.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
@ -32,6 +33,7 @@ namespace {
constexpr char kTagAtPreStream[] = "AT_PRESTREAM";
constexpr char kTagAtPostStream[] = "AT_POSTSTREAM";
constexpr char kTagAtZero[] = "AT_ZERO";
constexpr char kTagAtFirstTick[] = "AT_FIRST_TICK";
constexpr char kTagAtTick[] = "AT_TICK";
constexpr char kTagTick[] = "TICK";
constexpr char kTagAtTimestamp[] = "AT_TIMESTAMP";
@ -43,6 +45,7 @@ static std::map<std::string, Timestamp>* kTimestampMap = []() {
res->emplace(kTagAtPostStream, Timestamp::PostStream());
res->emplace(kTagAtZero, Timestamp(0));
res->emplace(kTagAtTick, Timestamp::Unset());
res->emplace(kTagAtFirstTick, Timestamp::Unset());
res->emplace(kTagAtTimestamp, Timestamp::Unset());
return res;
}();
@ -59,8 +62,8 @@ std::string GetOutputTag(const CC& cc) {
// timestamp, depending on the tag used to define output stream(s). (One tag can
// be used only.)
//
// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, AT_TIMESTAMP
// and corresponding timestamps are Timestamp::PreStream(),
// Valid tags are AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, AT_FIRST_TICK,
// AT_TIMESTAMP and corresponding timestamps are Timestamp::PreStream(),
// Timestamp::PostStream(), Timestamp(0), timestamp of a packet received in TICK
// input, and timestamp received from a side input.
//
@ -96,6 +99,7 @@ class SidePacketToStreamCalculator : public CalculatorBase {
private:
bool is_tick_processing_ = false;
bool close_on_first_tick_ = false;
std::string output_tag_;
};
REGISTER_CALCULATOR(SidePacketToStreamCalculator);
@ -103,13 +107,16 @@ REGISTER_CALCULATOR(SidePacketToStreamCalculator);
absl::Status SidePacketToStreamCalculator::GetContract(CalculatorContract* cc) {
const auto& tags = cc->Outputs().GetTags();
RET_CHECK(tags.size() == 1 && kTimestampMap->count(*tags.begin()) == 1)
<< "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and "
"AT_TIMESTAMP tags is allowed and required to specify output "
"stream(s).";
RET_CHECK(
(cc->Outputs().HasTag(kTagAtTick) && cc->Inputs().HasTag(kTagTick)) ||
(!cc->Outputs().HasTag(kTagAtTick) && !cc->Inputs().HasTag(kTagTick)))
<< "Either both of TICK and AT_TICK should be used or none of them.";
<< "Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK, "
"AT_FIRST_TICK and AT_TIMESTAMP tags is allowed and required to "
"specify output stream(s).";
const bool has_tick_output =
cc->Outputs().HasTag(kTagAtTick) || cc->Outputs().HasTag(kTagAtFirstTick);
const bool has_tick_input = cc->Inputs().HasTag(kTagTick);
RET_CHECK((has_tick_output && has_tick_input) ||
(!has_tick_output && !has_tick_input))
<< "Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output "
"should be used or none of them.";
RET_CHECK((cc->Outputs().HasTag(kTagAtTimestamp) &&
cc->InputSidePackets().HasTag(kTagSideInputTimestamp)) ||
(!cc->Outputs().HasTag(kTagAtTimestamp) &&
@ -148,11 +155,17 @@ absl::Status SidePacketToStreamCalculator::Open(CalculatorContext* cc) {
// timestamp bound update.
cc->SetOffset(TimestampDiff(0));
}
if (output_tag_ == kTagAtFirstTick) {
close_on_first_tick_ = true;
}
return absl::OkStatus();
}
absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) {
if (is_tick_processing_) {
if (cc->Outputs().Get(output_tag_, 0).IsClosed()) {
return absl::OkStatus();
}
// TICK input is guaranteed to be non-empty, as it's the only input stream
// for this calculator.
const auto& timestamp = cc->Inputs().Tag(kTagTick).Value().Timestamp();
@ -160,6 +173,9 @@ absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) {
cc->Outputs()
.Get(output_tag_, i)
.AddPacket(cc->InputSidePackets().Index(i).At(timestamp));
if (close_on_first_tick_) {
cc->Outputs().Get(output_tag_, i).Close();
}
}
return absl::OkStatus();
@ -170,6 +186,7 @@ absl::Status SidePacketToStreamCalculator::Process(CalculatorContext* cc) {
absl::Status SidePacketToStreamCalculator::Close(CalculatorContext* cc) {
if (!cc->Outputs().HasTag(kTagAtTick) &&
!cc->Outputs().HasTag(kTagAtFirstTick) &&
!cc->Outputs().HasTag(kTagAtTimestamp)) {
const auto& timestamp = kTimestampMap->at(output_tag_);
for (int i = 0; i < cc->Outputs().NumEntries(output_tag_); ++i) {

View File

@ -27,13 +27,17 @@
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/tool/options_util.h"
#include "mediapipe/util/packet_test_util.h"
namespace mediapipe {
namespace {
using testing::HasSubstr;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
TEST(SidePacketToStreamCalculator, WrongConfig_MissingTick) {
TEST(SidePacketToStreamCalculator, WrongConfigWithMissingTick) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -52,10 +56,35 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MissingTick) {
EXPECT_THAT(
status.message(),
HasSubstr(
"Either both of TICK and AT_TICK should be used or none of them."));
"Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output "
"should be used or none of them."));
}
TEST(SidePacketToStreamCalculator, WrongConfig_MissingTimestampSideInput) {
TEST(SidePacketToStreamCalculator,
WrongConfigWithMissingTickForFirstTickProcessing) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "tick"
input_side_packet: "side_packet"
output_stream: "packet"
node {
calculator: "SidePacketToStreamCalculator"
input_side_packet: "side_packet"
output_stream: "AT_FIRST_TICK:packet"
}
)pb");
CalculatorGraph graph;
auto status = graph.Initialize(graph_config);
EXPECT_FALSE(status.ok());
EXPECT_THAT(
status.message(),
HasSubstr(
"Either both TICK input and tick (AT_TICK/AT_FIRST_TICK) output "
"should be used or none of them."));
}
TEST(SidePacketToStreamCalculator, WrongConfigWithMissingTimestampSideInput) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -76,7 +105,7 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MissingTimestampSideInput) {
"or none of them."));
}
TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) {
TEST(SidePacketToStreamCalculator, WrongConfigWithNonExistentTag) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -92,14 +121,13 @@ TEST(SidePacketToStreamCalculator, WrongConfig_NonExistentTag) {
CalculatorGraph graph;
auto status = graph.Initialize(graph_config);
EXPECT_FALSE(status.ok());
EXPECT_THAT(
status.message(),
HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and "
"AT_TIMESTAMP tags is allowed and required to specify output "
"stream(s)."));
EXPECT_THAT(status.message(),
HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, "
"AT_TICK, AT_FIRST_TICK and AT_TIMESTAMP tags is "
"allowed and required to specify output stream(s)."));
}
TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) {
TEST(SidePacketToStreamCalculator, WrongConfigWithMixedTags) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -117,14 +145,13 @@ TEST(SidePacketToStreamCalculator, WrongConfig_MixedTags) {
CalculatorGraph graph;
auto status = graph.Initialize(graph_config);
EXPECT_FALSE(status.ok());
EXPECT_THAT(
status.message(),
HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, AT_TICK and "
"AT_TIMESTAMP tags is allowed and required to specify output "
"stream(s)."));
EXPECT_THAT(status.message(),
HasSubstr("Only one of AT_PRESTREAM, AT_POSTSTREAM, AT_ZERO, "
"AT_TICK, AT_FIRST_TICK and AT_TIMESTAMP tags is "
"allowed and required to specify output stream(s)."));
}
TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughSidePackets) {
TEST(SidePacketToStreamCalculator, WrongConfigWithNotEnoughSidePackets) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -146,7 +173,7 @@ TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughSidePackets) {
"Same number of input side packets and output streams is required."));
}
TEST(SidePacketToStreamCalculator, WrongConfig_NotEnoughOutputStreams) {
TEST(SidePacketToStreamCalculator, WrongConfigWithNotEnoughOutputStreams) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -248,7 +275,50 @@ TEST(SidePacketToStreamCalculator, AtTick) {
tick_and_verify(/*at_timestamp=*/1025);
}
TEST(SidePacketToStreamCalculator, AtTick_MultipleSidePackets) {
TEST(SidePacketToStreamCalculator, AtFirstTick) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "tick"
input_side_packet: "side_packet"
output_stream: "packet"
node {
calculator: "SidePacketToStreamCalculator"
input_stream: "TICK:tick"
input_side_packet: "side_packet"
output_stream: "AT_FIRST_TICK:packet"
}
)pb");
std::vector<Packet> output_packets;
tool::AddVectorSink("packet", &graph_config, &output_packets);
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
const int expected_value = 20;
const Timestamp kTestTimestamp(1234);
MP_ASSERT_OK(
graph.StartRun({{"side_packet", MakePacket<int>(expected_value)}}));
auto insert_tick = [&graph](Timestamp at_timestamp) {
MP_ASSERT_OK(graph.AddPacketToInputStream(
"tick", MakePacket<int>(/*doesn't matter*/ 1).At(at_timestamp)));
MP_ASSERT_OK(graph.WaitUntilIdle());
};
insert_tick(kTestTimestamp);
EXPECT_THAT(output_packets,
ElementsAre(PacketContainsTimestampAndPayload<int>(
Eq(kTestTimestamp), Eq(expected_value))));
output_packets.clear();
// Should not result in an additional output.
insert_tick(kTestTimestamp + 1);
EXPECT_THAT(output_packets, IsEmpty());
}
TEST(SidePacketToStreamCalculator, AtTickWithMultipleSidePackets) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
@ -302,6 +372,62 @@ TEST(SidePacketToStreamCalculator, AtTick_MultipleSidePackets) {
tick_and_verify(/*at_timestamp=*/1025);
}
TEST(SidePacketToStreamCalculator, AtFirstTickWithMultipleSidePackets) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "tick"
input_side_packet: "side_packet0"
input_side_packet: "side_packet1"
output_stream: "packet0"
output_stream: "packet1"
node {
calculator: "SidePacketToStreamCalculator"
input_stream: "TICK:tick"
input_side_packet: "side_packet0"
input_side_packet: "side_packet1"
output_stream: "AT_FIRST_TICK:0:packet0"
output_stream: "AT_FIRST_TICK:1:packet1"
}
)pb");
std::vector<Packet> output_packets0;
tool::AddVectorSink("packet0", &graph_config, &output_packets0);
std::vector<Packet> output_packets1;
tool::AddVectorSink("packet1", &graph_config, &output_packets1);
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
const int expected_value0 = 20;
const int expected_value1 = 128;
const Timestamp kTestTimestamp(1234);
MP_ASSERT_OK(
graph.StartRun({{"side_packet0", MakePacket<int>(expected_value0)},
{"side_packet1", MakePacket<int>(expected_value1)}}));
auto insert_tick = [&graph](Timestamp at_timestamp) {
MP_ASSERT_OK(graph.AddPacketToInputStream(
"tick", MakePacket<int>(/*doesn't matter*/ 1).At(at_timestamp)));
MP_ASSERT_OK(graph.WaitUntilIdle());
};
insert_tick(kTestTimestamp);
EXPECT_THAT(output_packets0,
ElementsAre(PacketContainsTimestampAndPayload<int>(
Eq(kTestTimestamp), Eq(expected_value0))));
EXPECT_THAT(output_packets1,
ElementsAre(PacketContainsTimestampAndPayload<int>(
Eq(kTestTimestamp), Eq(expected_value1))));
output_packets0.clear();
output_packets1.clear();
// Should not result in an additional output.
insert_tick(kTestTimestamp + 1);
EXPECT_THAT(output_packets0, IsEmpty());
EXPECT_THAT(output_packets1, IsEmpty());
}
TEST(SidePacketToStreamCalculator, AtTimestamp) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
@ -334,7 +460,7 @@ TEST(SidePacketToStreamCalculator, AtTimestamp) {
EXPECT_EQ(expected_value, output_packets.back().Get<int>());
}
TEST(SidePacketToStreamCalculator, AtTimestamp_MultipleOutputs) {
TEST(SidePacketToStreamCalculator, AtTimestampWithMultipleOutputs) {
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(

View File

@ -17,6 +17,7 @@
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/body_rig.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
@ -196,6 +197,18 @@ class SplitLandmarkListCalculator
};
REGISTER_CALCULATOR(SplitLandmarkListCalculator);
class SplitJointListCalculator : public SplitListsCalculator<Joint, JointList> {
protected:
int ListSize(const JointList& list) const override {
return list.joint_size();
}
const Joint GetItem(const JointList& list, int idx) const override {
return list.joint(idx);
}
Joint* AddItem(JointList& list) const override { return list.add_joint(); }
};
REGISTER_CALCULATOR(SplitJointListCalculator);
} // namespace mediapipe
// NOLINTNEXTLINE

View File

@ -0,0 +1,90 @@
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
constexpr char kInputValueTag[] = "IN";
constexpr char kTickerTag[] = "TICK";
constexpr char kOutputTag[] = "OUT";
constexpr char kIndicationTag[] = "FLAG";
} // namespace
// For every packet received on the TICK stream, if the IN stream is not
// empty - emit its value as is as OUT. Otherwise output a default packet.
// FLAG outputs true every time the default value has been used. It does not
// output anything when IN has a value.
//
// Example config:
// node {
// calculator: "ValueOrDefaultCalculator"
// input_stream: "IN:sometimes_missing_value"
// input_stream: "TICK:clock"
// output_stream: "OUT:value_or_default"
// output_stream: "FLAG:used_default"
// input_side_packet: "default"
// }
//
// TODO: Consider adding an option for a default value as a input-stream
// instead of a side-packet, so it will enable using standard calculators
// instead of creating a new packet-generators. It will also allow a dynamic
// default value.
class ValueOrDefaultCalculator : public mediapipe::CalculatorBase {
public:
ValueOrDefaultCalculator() {}
ValueOrDefaultCalculator(const ValueOrDefaultCalculator&) = delete;
ValueOrDefaultCalculator& operator=(const ValueOrDefaultCalculator&) = delete;
static mediapipe::Status GetContract(mediapipe::CalculatorContract* cc) {
cc->Inputs().Tag(kInputValueTag).SetAny();
cc->Inputs().Tag(kTickerTag).SetAny();
cc->Outputs().Tag(kOutputTag).SetSameAs(&cc->Inputs().Tag(kInputValueTag));
cc->Outputs().Tag(kIndicationTag).Set<bool>();
cc->InputSidePackets().Index(0).SetSameAs(
&cc->Inputs().Tag(kInputValueTag));
return mediapipe::OkStatus();
}
mediapipe::Status Open(mediapipe::CalculatorContext* cc) override {
if (!cc->Inputs().Tag(kInputValueTag).Header().IsEmpty()) {
cc->Outputs()
.Tag(kOutputTag)
.SetHeader(cc->Inputs().Tag(kInputValueTag).Header());
}
default_ = cc->InputSidePackets().Index(0);
cc->SetOffset(mediapipe::TimestampDiff(0));
return mediapipe::OkStatus();
}
mediapipe::Status Process(mediapipe::CalculatorContext* cc) override {
// Output according to the TICK signal.
if (cc->Inputs().Tag(kTickerTag).IsEmpty()) {
return mediapipe::OkStatus();
}
if (!cc->Inputs().Tag(kInputValueTag).IsEmpty()) {
// Output the input as is:
cc->Outputs()
.Tag(kOutputTag)
.AddPacket(cc->Inputs().Tag(kInputValueTag).Value());
} else {
// Output default:
cc->Outputs()
.Tag(kOutputTag)
.AddPacket(default_.At(cc->InputTimestamp()));
cc->Outputs()
.Tag(kIndicationTag)
.Add(new bool(true), cc->InputTimestamp());
}
return mediapipe::OkStatus();
}
private:
// The default value to replicate every time there is no new value.
mediapipe::Packet default_;
};
REGISTER_CALCULATOR(ValueOrDefaultCalculator);
} // namespace mediapipe

View File

@ -0,0 +1,240 @@
#include <algorithm>
#include <cstdint>
#include <vector>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
using ::testing::AllOf;
using ::testing::ContainerEq;
using ::testing::Each;
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::testing::Test;
const int kDefaultValue = 0;
// Utility to a create a mediapipe graph runner with the tested calculator and a
// default value, for all the tests.
class ValueOrDefaultRunner : public mediapipe::CalculatorRunner {
public:
ValueOrDefaultRunner()
: mediapipe::CalculatorRunner(R"pb(
calculator: "ValueOrDefaultCalculator"
input_stream: "IN:in"
input_stream: "TICK:tick"
input_side_packet: "default"
output_stream: "OUT:out"
output_stream: "FLAG:used_default"
)pb") {
MutableSidePackets()->Index(0) = mediapipe::MakePacket<int>(kDefaultValue);
}
// Utility to push inputs to the runner to the TICK stream, so we could easily
// tick.
void TickAt(int64_t time) {
// The type or value of the stream isn't relevant, we use just a bool.
MutableInputs()->Tag("TICK").packets.push_back(
mediapipe::Adopt(new bool(false)).At(mediapipe::Timestamp(time)));
}
// Utility to push the real inputs to the runner (IN stream).
void ProvideInput(int64_t time, int value) {
MutableInputs()->Tag("IN").packets.push_back(
mediapipe::Adopt(new int(value)).At(mediapipe::Timestamp(time)));
}
// Extracts the timestamps (as int64) of the output stream of the calculator.
std::vector<int64_t> GetOutputTimestamps() const {
std::vector<int64_t> timestamps;
for (const mediapipe::Packet& packet : Outputs().Tag("OUT").packets) {
timestamps.emplace_back(packet.Timestamp().Value());
}
return timestamps;
}
// Extracts the values from the output stream of the calculator.
std::vector<int> GetOutputValues() const {
std::vector<int> values;
for (const mediapipe::Packet& packet : Outputs().Tag("OUT").packets) {
values.emplace_back(packet.Get<int>());
}
return values;
}
// Extracts the timestamps (as int64) of the flag stream, which indicates on
// times without an input value (i.e. using the default value).
std::vector<int64_t> GetFlagTimestamps() const {
std::vector<int64_t> timestamps;
for (const mediapipe::Packet& packet : Outputs().Tag("FLAG").packets) {
timestamps.emplace_back(packet.Timestamp().Value());
}
return timestamps;
}
// Extracts the output from the flags stream (which should always be true).
std::vector<bool> GetFlagValues() const {
std::vector<bool> flags;
for (const mediapipe::Packet& packet : Outputs().Tag("FLAG").packets) {
flags.emplace_back(packet.Get<bool>());
}
return flags;
}
};
// To be used as input values:
std::vector<int> GetIntegersRange(int size) {
std::vector<int> result;
for (int i = 0; i < size; ++i) {
// We start with default-value+1 so it won't contain the default value.
result.push_back(kDefaultValue + 1 + i);
}
return result;
}
TEST(ValueOrDefaultCalculatorTest, NoInputs) {
// Check that when no real inputs are provided - we get the default value over
// and over, with the correct timestamps.
ValueOrDefaultRunner runner;
const std::vector<int64_t> ticks = {0, 1, 2, 5, 8, 12, 33, 231};
for (int tick : ticks) {
runner.TickAt(tick);
}
MP_EXPECT_OK(runner.Run());
// Make sure we get the right timestamps:
EXPECT_THAT(runner.GetOutputTimestamps(), ContainerEq(ticks));
// All should be default value:
EXPECT_THAT(runner.GetOutputValues(),
AllOf(Each(kDefaultValue), SizeIs(ticks.size())));
// We should get the default indication all the time:
EXPECT_THAT(runner.GetFlagTimestamps(), ContainerEq(ticks));
}
TEST(ValueOrDefaultCalculatorTest, NeverDefault) {
// Check that when we provide the inputs on time - we get them as outputs.
ValueOrDefaultRunner runner;
const std::vector<int64_t> ticks = {0, 1, 2, 5, 8, 12, 33, 231};
const std::vector<int> values = GetIntegersRange(ticks.size());
for (int i = 0; i < ticks.size(); ++i) {
runner.TickAt(ticks[i]);
runner.ProvideInput(ticks[i], values[i]);
}
MP_EXPECT_OK(runner.Run());
// Make sure we get the right timestamps:
EXPECT_THAT(runner.GetOutputTimestamps(), ContainerEq(ticks));
// Should get the inputs values:
EXPECT_THAT(runner.GetOutputValues(), ContainerEq(values));
// We should never get the default indication:
EXPECT_THAT(runner.GetFlagTimestamps(), IsEmpty());
}
TEST(ValueOrDefaultCalculatorTest, DefaultAndValues) {
// Check that when we provide inputs only part of the time - we get them, but
// defaults at the missing times.
// That's the usual use case for this calculator.
ValueOrDefaultRunner runner;
const std::vector<int64_t> ticks = {0, 1, 5, 8, 12, 231};
// Provide inputs only part of the ticks.
// Chosen so there will be defaults before the first input, between the
// inputs and after the last input.
const std::vector<int64_t> in_ticks = {/*0,*/ 1, 5, /*8,*/ 12, /*, 231*/};
const std::vector<int> in_values = GetIntegersRange(in_ticks.size());
for (int tick : ticks) {
runner.TickAt(tick);
}
for (int i = 0; i < in_ticks.size(); ++i) {
runner.ProvideInput(in_ticks[i], in_values[i]);
}
MP_EXPECT_OK(runner.Run());
// Make sure we get all the timestamps:
EXPECT_THAT(runner.GetOutputTimestamps(), ContainerEq(ticks));
// The timestamps of the flag should be exactly the ones not in in_ticks.
EXPECT_THAT(runner.GetFlagTimestamps(), ElementsAre(0, 8, 231));
// And the values are default in these times, and the input values for
// in_ticks.
EXPECT_THAT(
runner.GetOutputValues(),
ElementsAre(kDefaultValue, 1, 2, kDefaultValue, 3, kDefaultValue));
}
TEST(ValueOrDefaultCalculatorTest, TimestampsMismatch) {
// Check that when we provide the inputs not on time - we don't get them.
ValueOrDefaultRunner runner;
const std::vector<int64_t> ticks = {1, 2, 5, 8, 12, 33, 231};
// The timestamps chosen so it will be before the first tick, in between ticks
// and after the last one. Also - more inputs than ticks.
const std::vector<int64_t> in_ticks = {0, 3, 4, 6, 7, 9, 10,
11, 13, 14, 15, 16, 232};
const std::vector<int> in_values = GetIntegersRange(in_ticks.size());
for (int tick : ticks) {
runner.TickAt(tick);
}
for (int i = 0; i < in_ticks.size(); ++i) {
runner.ProvideInput(in_ticks[i], in_values[i]);
}
MP_EXPECT_OK(runner.Run());
// Non of the in_ticks should be inserted:
EXPECT_THAT(runner.GetOutputTimestamps(), ContainerEq(ticks));
EXPECT_THAT(runner.GetOutputValues(),
AllOf(Each(kDefaultValue), SizeIs(ticks.size())));
// All (and only) ticks should get the default.
EXPECT_THAT(runner.GetFlagTimestamps(), ContainerEq(ticks));
}
TEST(ValueOrDefaultCalculatorTest, FlagValue) {
// Since we anyway suppose that the Flag is a bool - there is nothing
// interesting to check, but we should check once that the value is the right
// (true) one.
ValueOrDefaultRunner runner;
runner.TickAt(0);
MP_EXPECT_OK(runner.Run());
EXPECT_THAT(runner.GetFlagValues(), ElementsAre(true));
}
TEST(ValueOrDefaultCalculatorTest, FullTest) {
// Make sure that nothing gets wrong with an input that have both right and
// wrong timestamps, some defaults etc.
ValueOrDefaultRunner runner;
const std::vector<int64_t> ticks = {1, 2, 5, 8, 12, 33, 231};
const std::vector<int64_t> in_ticks = {0, 2, 4, 6, 8, 9, 12, 33, 54, 232};
const std::vector<int> in_values = GetIntegersRange(in_ticks.size());
for (int tick : ticks) {
runner.TickAt(tick);
}
for (int i = 0; i < in_ticks.size(); ++i) {
runner.ProvideInput(in_ticks[i], in_values[i]);
}
MP_EXPECT_OK(runner.Run());
EXPECT_THAT(runner.GetOutputTimestamps(), ContainerEq(ticks));
// Calculated by hand:
EXPECT_THAT(
runner.GetOutputValues(),
ElementsAre(kDefaultValue, 2, kDefaultValue, 5, 7, 8, kDefaultValue));
EXPECT_THAT(runner.GetFlagTimestamps(), ElementsAre(1, 5, 231));
EXPECT_THAT(runner.GetFlagValues(), AllOf(Each(true), SizeIs(3)));
}
} // namespace
} // namespace mediapipe

View File

@ -301,9 +301,11 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"//mediapipe/gpu:multi_pool",
"//third_party:opencv",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
@ -786,6 +788,7 @@ cc_library(
":affine_transformation_runner_gl",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_service",
],
}) + select({
"//mediapipe/framework/port:disable_opencv": [],

View File

@ -223,7 +223,7 @@ class GlTextureWarpAffineRunner
absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble,
interpolation_def, kFragShader);
ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
MP_ASSIGN_OR_RETURN(program_, create_fn(vert_src, frag_src));
auto create_custom_zero_fn = [&]() -> absl::StatusOr<Program> {
std::string custom_zero_border_mode_def = R"(
@ -236,10 +236,10 @@ class GlTextureWarpAffineRunner
};
#if GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
if (!IsGlClampToBorderSupported(gl_helper_->GetGlContext())) {
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
MP_ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
}
#else
ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
MP_ASSIGN_OR_RETURN(program_custom_zero_, create_custom_zero_fn());
#endif // GL_CLAMP_TO_BORDER_MAY_BE_SUPPORTED
glGenFramebuffers(1, &framebuffer_);

View File

@ -59,7 +59,7 @@ class OpenCvRunner
const ImageFrame& input, const std::array<float, 16>& matrix,
const AffineTransformation::Size& size,
AffineTransformation::BorderMode border_mode) override {
// OpenCV warpAffine works in absolute coordinates, so the transfom (which
// OpenCV warpAffine works in absolute coordinates, so the transform (which
// accepts and produces relative coordinates) should be adjusted to first
// normalize coordinates and then scale them.
// clang-format off

View File

@ -64,7 +64,8 @@ class ImageCloneCalculator : public Node {
"GPU processing is disabled in build flags");
}
#else
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(
cc, /*request_gpu_as_optional=*/true));
#endif // MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
@ -72,9 +73,6 @@ class ImageCloneCalculator : public Node {
absl::Status Open(CalculatorContext* cc) override {
const auto& options = cc->Options<mediapipe::ImageCloneCalculatorOptions>();
output_on_gpu_ = options.output_on_gpu();
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
@ -104,6 +102,10 @@ class ImageCloneCalculator : public Node {
if (output_on_gpu_ && !input_on_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
if (!gpu_initialized_) {
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
gpu_initialized_ = true;
}
gpu_helper_.RunInGlContext([&output]() { output->ConvertToGpu(); });
#endif // !MEDIAPIPE_DISABLE_GPU
} else if (!output_on_gpu_ && input_on_gpu) {
@ -118,6 +120,7 @@ class ImageCloneCalculator : public Node {
bool output_on_gpu_;
#if !MEDIAPIPE_DISABLE_GPU
mediapipe::GlCalculatorHelper gpu_helper_;
bool gpu_initialized_ = false;
#endif // !MEDIAPIPE_DISABLE_GPU
};
MEDIAPIPE_REGISTER_NODE(ImageCloneCalculator);

View File

@ -24,7 +24,7 @@ message ImageCroppingCalculatorOptions {
}
// Output texture buffer dimensions. The values defined in the options will be
// overriden by the WIDTH and HEIGHT input streams if they exist.
// overridden by the WIDTH and HEIGHT input streams if they exist.
optional int32 width = 1;
optional int32 height = 2;

View File

@ -77,7 +77,7 @@ absl::StatusOr<double> ComputeFocalLengthInPixels(int image_width,
return focal_length_pixels;
}
absl::StatusOr<ImageFileProperties> GetImageFileProperites(
absl::StatusOr<ImageFileProperties> GetImageFileProperties(
const std::string& image_bytes) {
easyexif::EXIFInfo result;
int code = result.parseFrom(image_bytes);
@ -92,11 +92,11 @@ absl::StatusOr<ImageFileProperties> GetImageFileProperites(
properties.set_focal_length_mm(result.FocalLength);
properties.set_focal_length_35mm(result.FocalLengthIn35mm);
ASSIGN_OR_RETURN(auto focal_length_pixels,
ComputeFocalLengthInPixels(properties.image_width(),
properties.image_height(),
properties.focal_length_35mm(),
properties.focal_length_mm()));
MP_ASSIGN_OR_RETURN(auto focal_length_pixels,
ComputeFocalLengthInPixels(properties.image_width(),
properties.image_height(),
properties.focal_length_35mm(),
properties.focal_length_mm()));
properties.set_focal_length_pixels(focal_length_pixels);
return properties;
@ -151,7 +151,7 @@ class ImageFilePropertiesCalculator : public CalculatorBase {
if (cc->InputSidePackets().NumEntries() == 1) {
const std::string& image_bytes =
cc->InputSidePackets().Index(0).Get<std::string>();
ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes));
MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperties(image_bytes));
read_properties_ = true;
}
@ -169,7 +169,7 @@ class ImageFilePropertiesCalculator : public CalculatorBase {
return absl::OkStatus();
}
const std::string& image_bytes = cc->Inputs().Index(0).Get<std::string>();
ASSIGN_OR_RETURN(properties_, GetImageFileProperites(image_bytes));
MP_ASSIGN_OR_RETURN(properties_, GetImageFileProperties(image_bytes));
read_properties_ = true;
}
if (read_properties_) {

View File

@ -656,6 +656,15 @@ absl::Status ImageTransformationCalculator::RenderGpu(CalculatorContext* cc) {
input.format());
gpu_helper_.BindFramebuffer(dst);
if (scale_mode_ == mediapipe::ScaleMode::FIT) {
// In kFit scale mode, the rendered quad does not fill the whole
// framebuffer, so clear it beforehand.
glClearColor(padding_color_[0] / 255.0f, padding_color_[1] / 255.0f,
padding_color_[2] / 255.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
}
glActiveTexture(GL_TEXTURE1);
glBindTexture(src1.target(), src1.name());

View File

@ -46,13 +46,14 @@ message ImageTransformationCalculatorOptions {
optional bool flip_horizontally = 5 [default = false];
// Scale mode.
optional ScaleMode.Mode scale_mode = 6;
// Padding type. This option is only used when the scale mode is FIT.
// Default is to use BORDER_CONSTANT. If set to false, it will use
// BORDER_REPLICATE instead.
// Padding type. This option is only used when the scale mode is FIT. If set
// to true (default), a constant border is added with color specified by
// padding_color. If set to false, a border is added by replicating edge
// pixels (only supported for CPU).
optional bool constant_padding = 7 [default = true];
// The color for the padding. This option is only used when the scale mode is
// FIT. Default is black. This is for CPU only.
// FIT. Default is black.
optional Color padding_color = 8;
// Interpolation method to use. Note that on CPU when LINEAR is specified,

View File

@ -1,9 +1,11 @@
#include <algorithm>
#include <string>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
@ -16,10 +18,14 @@
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/gpu/multi_pool.h"
#include "testing/base/public/gmock.h"
#include "testing/base/public/googletest.h"
#include "testing/base/public/gunit.h"
#include "third_party/OpenCV/core.hpp" // IWYU pragma: keep
#include "third_party/OpenCV/core/base.hpp"
#include "third_party/OpenCV/core/mat.hpp"
#include "third_party/OpenCV/core/types.hpp"
namespace mediapipe {
@ -76,11 +82,12 @@ TEST(ImageTransformationCalculatorTest, NearestNeighborResizing) {
->Tag("OUTPUT_DIMENSIONS")
.packets.push_back(input_output_dim_packet.At(Timestamp(0)));
MP_ASSERT_OK(runner.Run());
ABSL_QCHECK_OK(runner.Run());
const auto& outputs = runner.Outputs();
ASSERT_EQ(outputs.NumEntries(), 1);
ABSL_QCHECK_EQ(outputs.NumEntries(), 1);
const std::vector<Packet>& packets = outputs.Tag("IMAGE").packets;
ASSERT_EQ(packets.size(), 1);
ABSL_QCHECK_EQ(packets.size(), 1);
const auto& result = packets[0].Get<ImageFrame>();
ASSERT_EQ(output_dim.first, result.Width());
ASSERT_EQ(output_dim.second, result.Height());
@ -137,11 +144,12 @@ TEST(ImageTransformationCalculatorTest,
->Tag("OUTPUT_DIMENSIONS")
.packets.push_back(input_output_dim_packet.At(Timestamp(0)));
MP_ASSERT_OK(runner.Run());
ABSL_QCHECK_OK(runner.Run());
const auto& outputs = runner.Outputs();
ASSERT_EQ(outputs.NumEntries(), 1);
ABSL_QCHECK_EQ(outputs.NumEntries(), 1);
const std::vector<Packet>& packets = outputs.Tag("IMAGE").packets;
ASSERT_EQ(packets.size(), 1);
ABSL_QCHECK_EQ(packets.size(), 1);
const auto& result = packets[0].Get<ImageFrame>();
ASSERT_EQ(output_dim.first, result.Width());
ASSERT_EQ(output_dim.second, result.Height());
@ -207,17 +215,17 @@ TEST(ImageTransformationCalculatorTest, NearestNeighborResizingGpu) {
tool::AddVectorSink("output_image", &graph_config, &output_image_packets);
CalculatorGraph graph(graph_config);
MP_ASSERT_OK(graph.StartRun({}));
ABSL_QCHECK_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"input_image",
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"image_size",
MakePacket<std::pair<int, int>>(output_dim).At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_image_packets, testing::SizeIs(1));
ABSL_QCHECK_OK(graph.WaitUntilIdle());
ABSL_QCHECK_EQ(output_image_packets.size(), 1);
const auto& output_image = output_image_packets[0].Get<ImageFrame>();
ASSERT_EQ(output_dim.first, output_image.Width());
@ -287,16 +295,16 @@ TEST(ImageTransformationCalculatorTest,
tool::AddVectorSink("output_image", &graph_config, &output_image_packets);
CalculatorGraph graph(graph_config);
MP_ASSERT_OK(graph.StartRun({}));
ABSL_QCHECK_OK(graph.StartRun({}));
MP_ASSERT_OK(graph.AddPacketToInputStream(
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"input_image", input_image_packet.At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"image_size",
MakePacket<std::pair<int, int>>(output_dim).At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_image_packets, testing::SizeIs(1));
ABSL_QCHECK_OK(graph.WaitUntilIdle());
ABSL_QCHECK_EQ(output_image_packets.size(), 1);
const auto& output_image = output_image_packets[0].Get<ImageFrame>();
ASSERT_EQ(output_dim.first, output_image.Width());
@ -311,5 +319,112 @@ TEST(ImageTransformationCalculatorTest,
}
}
TEST(ImageTransformationCalculatorTest, FitScalingClearsBackground) {
// Regression test for not clearing the background in FIT scaling mode.
// First scale an all-red (=r) image from 8x4 to 8x4, so it's a plain copy:
// rrrrrrrr
// rrrrrrrr
// rrrrrrrr
// rrrrrrrr
// Then scale an all-blue image from 4x4 to 8x4 in FIT mode. This should
// introduce dark yellow (=y) letterboxes left and right due to padding_color:
// yybbbbyy
// yybbbbyy
// yybbbbyy
// yybbbbyy
// We make sure that the all-red buffer gets reused. Without clearing the
// background, the blue (=b) image will have red letterboxes:
// rrbbbbrr
// rrbbbbrr
// rrbbbbrr
// rrbbbbrr
constexpr int kSmall = 4, kLarge = 8;
ImageFrame input_image_red(ImageFormat::SRGBA, kLarge, kSmall);
cv::Mat input_image_red_mat = formats::MatView(&input_image_red);
input_image_red_mat = cv::Scalar(255, 0, 0, 255);
ImageFrame input_image_blue(ImageFormat::SRGBA, kSmall, kSmall);
cv::Mat input_image_blue_mat = formats::MatView(&input_image_blue);
input_image_blue_mat = cv::Scalar(0, 0, 255, 255);
Packet input_image_red_packet =
MakePacket<ImageFrame>(std::move(input_image_red));
Packet input_image_blue_packet =
MakePacket<ImageFrame>(std::move(input_image_blue));
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
R"pb(
input_stream: "input_image"
output_stream: "output_image"
node {
calculator: "ImageFrameToGpuBufferCalculator"
input_stream: "input_image"
output_stream: "input_image_gpu"
}
node {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:input_image_gpu"
output_stream: "IMAGE_GPU:output_image_gpu"
options: {
[mediapipe.ImageTransformationCalculatorOptions.ext]: {
scale_mode: FIT
output_width: $0,
output_height: $1,
padding_color: { red: 128, green: 128, blue: 0 }
}
}
}
node {
calculator: "GpuBufferToImageFrameCalculator"
input_stream: "output_image_gpu"
output_stream: "output_image"
})pb",
kLarge, kSmall));
std::vector<Packet> output_image_packets;
tool::AddVectorSink("output_image", &graph_config, &output_image_packets);
CalculatorGraph graph(graph_config);
ABSL_QCHECK_OK(graph.StartRun({}));
// Send the red image multiple times to cause the GPU pool to actually use
// a pool.
int num_red_packets =
std::max(kDefaultMultiPoolOptions.min_requests_before_pool, 1);
for (int n = 0; n < num_red_packets; ++n) {
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"input_image", input_image_red_packet.At(Timestamp(n))));
}
ABSL_QCHECK_OK(graph.AddPacketToInputStream(
"input_image", input_image_blue_packet.At(Timestamp(num_red_packets))));
ABSL_QCHECK_OK(graph.WaitUntilIdle());
ABSL_QCHECK_EQ(output_image_packets.size(), num_red_packets + 1);
const auto& output_image_red = output_image_packets[0].Get<ImageFrame>();
const auto& output_image_blue =
output_image_packets[num_red_packets].Get<ImageFrame>();
ABSL_QCHECK_EQ(output_image_red.Width(), kLarge);
ABSL_QCHECK_EQ(output_image_red.Height(), kSmall);
ABSL_QCHECK_EQ(output_image_blue.Width(), kLarge);
ABSL_QCHECK_EQ(output_image_blue.Height(), kSmall);
cv::Mat output_image_blue_mat = formats::MatView(&output_image_blue);
ImageFrame expected_image_blue(ImageFormat::SRGBA, kLarge, kSmall);
cv::Mat expected_image_blue_mat = formats::MatView(&expected_image_blue);
expected_image_blue_mat = cv::Scalar(128, 128, 0, 255);
cv::Rect rect((kLarge - kSmall) / 2, 0, kSmall, kSmall);
cv::rectangle(expected_image_blue_mat, rect, cv::Scalar(0, 0, 255, 255),
cv::FILLED);
EXPECT_EQ(cv::sum(cv::sum(output_image_blue_mat != expected_image_blue_mat)),
cv::Scalar(0));
}
} // namespace
} // namespace mediapipe

View File

@ -117,7 +117,8 @@ absl::Status SegmentationSmoothingCalculator::GetContract(
cc->Outputs().Tag(kOutputMaskTag).Set<Image>();
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(
cc, /*request_gpu_as_optional=*/true));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
@ -130,10 +131,6 @@ absl::Status SegmentationSmoothingCalculator::Open(CalculatorContext* cc) {
cc->Options<mediapipe::SegmentationSmoothingCalculatorOptions>();
combine_with_previous_ratio_ = options.combine_with_previous_ratio();
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
@ -154,6 +151,9 @@ absl::Status SegmentationSmoothingCalculator::Process(CalculatorContext* cc) {
if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
if (!gpu_initialized_) {
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
}
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
if (!gpu_initialized_) {
MP_RETURN_IF_ERROR(GlSetup(cc));
@ -178,10 +178,12 @@ absl::Status SegmentationSmoothingCalculator::Process(CalculatorContext* cc) {
absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
gpu_helper_.RunInGlContext([this] {
if (program_) glDeleteProgram(program_);
program_ = 0;
});
if (gpu_initialized_) {
gpu_helper_.RunInGlContext([this] {
if (program_) glDeleteProgram(program_);
program_ = 0;
});
}
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();

View File

@ -36,6 +36,7 @@
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_service.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
@ -79,8 +80,8 @@ class WarpAffineRunnerHolder<ImageFrame> {
}
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(runner_,
CreateAffineTransformationOpenCvRunner(interpolation_));
MP_ASSIGN_OR_RETURN(
runner_, CreateAffineTransformationOpenCvRunner(interpolation_));
}
return runner_.get();
}
@ -106,10 +107,12 @@ class WarpAffineRunnerHolder<mediapipe::GpuBuffer> {
cc->Options<mediapipe::WarpAffineCalculatorOptions>().interpolation());
return gl_helper_->Open(cc);
}
absl::StatusOr<RunnerType*> GetRunner() {
if (!runner_) {
ASSIGN_OR_RETURN(runner_, CreateAffineTransformationGlRunner(
gl_helper_, gpu_origin_, interpolation_));
MP_ASSIGN_OR_RETURN(
runner_, CreateAffineTransformationGlRunner(gl_helper_, gpu_origin_,
interpolation_));
}
return runner_.get();
}
@ -141,7 +144,10 @@ class WarpAffineRunnerHolder<mediapipe::Image> {
MP_RETURN_IF_ERROR(cpu_holder_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_OPENCV
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
if (cc->Service(kGpuService).IsAvailable()) {
MP_RETURN_IF_ERROR(gpu_holder_.Open(cc));
gpu_holder_initialized_ = true;
}
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
@ -151,24 +157,28 @@ class WarpAffineRunnerHolder<mediapipe::Image> {
AffineTransformation::BorderMode border_mode) override {
if (input.UsesGpu()) {
#if !MEDIAPIPE_DISABLE_GPU
ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
ASSIGN_OR_RETURN(auto result, runner->Run(input.GetGpuBuffer(), matrix,
size, border_mode));
if (!gpu_holder_initialized_) {
return absl::UnavailableError("GPU support is not available");
}
MP_ASSIGN_OR_RETURN(auto* runner, gpu_holder_.GetRunner());
MP_ASSIGN_OR_RETURN(
auto result,
runner->Run(input.GetGpuBuffer(), matrix, size, border_mode));
return mediapipe::Image(*result);
#else
return absl::UnavailableError("GPU support is disabled");
#endif // !MEDIAPIPE_DISABLE_GPU
}
#if !MEDIAPIPE_DISABLE_OPENCV
ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
MP_ASSIGN_OR_RETURN(auto* runner, cpu_holder_.GetRunner());
const auto& frame_ptr = input.GetImageFrameSharedPtr();
// Wrap image into image frame.
const ImageFrame image_frame(frame_ptr->Format(), frame_ptr->Width(),
frame_ptr->Height(), frame_ptr->WidthStep(),
const_cast<uint8_t*>(frame_ptr->PixelData()),
[](uint8_t* data){});
ASSIGN_OR_RETURN(auto result,
runner->Run(image_frame, matrix, size, border_mode));
MP_ASSIGN_OR_RETURN(auto result,
runner->Run(image_frame, matrix, size, border_mode));
return mediapipe::Image(std::make_shared<ImageFrame>(std::move(result)));
#else
return absl::UnavailableError("OpenCV support is disabled");
@ -181,6 +191,7 @@ class WarpAffineRunnerHolder<mediapipe::Image> {
#endif // !MEDIAPIPE_DISABLE_OPENCV
#if !MEDIAPIPE_DISABLE_GPU
WarpAffineRunnerHolder<mediapipe::GpuBuffer> gpu_holder_;
bool gpu_holder_initialized_ = false;
#endif // !MEDIAPIPE_DISABLE_GPU
};
@ -194,27 +205,31 @@ class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
static absl::Status UpdateContract(CalculatorContract* cc) {
if constexpr (std::is_same_v<InterfaceT, WarpAffineCalculatorGpu> ||
std::is_same_v<InterfaceT, WarpAffineCalculator>) {
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(
cc, /*request_gpu_as_optional=*/true));
}
return absl::OkStatus();
}
#endif // !MEDIAPIPE_DISABLE_GPU
absl::Status Open(CalculatorContext* cc) override { return holder_.Open(cc); }
absl::Status Process(CalculatorContext* cc) override {
if (InterfaceT::kInImage(cc).IsEmpty() ||
InterfaceT::kMatrix(cc).IsEmpty() ||
InterfaceT::kOutputSize(cc).IsEmpty()) {
return absl::OkStatus();
}
if (!holder_initialized_) {
MP_RETURN_IF_ERROR(holder_.Open(cc));
holder_initialized_ = true;
}
const std::array<float, 16>& transform = *InterfaceT::kMatrix(cc);
auto [out_width, out_height] = *InterfaceT::kOutputSize(cc);
AffineTransformation::Size output_size;
output_size.width = out_width;
output_size.height = out_height;
ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(auto* runner, holder_.GetRunner());
MP_ASSIGN_OR_RETURN(
auto result,
runner->Run(
*InterfaceT::kInImage(cc), transform, output_size,
@ -228,6 +243,7 @@ class WarpAffineCalculatorImpl : public mediapipe::api2::NodeImpl<InterfaceT> {
private:
WarpAffineRunnerHolder<typename decltype(InterfaceT::kInImage)::PayloadT>
holder_;
bool holder_initialized_ = false;
};
} // namespace

View File

@ -284,7 +284,7 @@ std::array<float, 16> GetMatrix(cv::Mat input, mediapipe::NormalizedRect roi,
.IgnoreError();
mediapipe::GetRotatedSubRectToRectTransformMatrix(
roi_absolute, input.cols, input.rows,
/*flip_horizontaly=*/false, &transform_mat);
/*flip_horizontally=*/false, &transform_mat);
return transform_mat;
}

View File

@ -49,7 +49,7 @@ std::string FourCCToString(libyuv::FourCC fourcc) {
// The input `YUVImage` is expected to be in the NV12, NV21, YV12 or I420 (aka
// YV21) format (as per the `fourcc()` property). This covers the most commonly
// used YUV image formats used on mobile devices. Other formats are not
// supported and wil result in an `InvalidArgumentError`.
// supported and will result in an `InvalidArgumentError`.
class YUVToImageCalculator : public Node {
public:
static constexpr Input<YUVImage> kInput{"YUV_IMAGE"};

View File

@ -13,16 +13,16 @@
# limitations under the License.
#
load("@bazel_skylib//lib:selects.bzl", "selects")
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test")
load("@bazel_skylib//lib:selects.bzl", "selects")
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite")
load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test")
load("//mediapipe/framework:more_selects.bzl", "more_selects")
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
load("@org_tensorflow//tensorflow/lite/core/shims:cc_library_with_tflite.bzl", "cc_library_with_tflite")
licenses(["notice"])
@ -50,10 +50,18 @@ more_selects.config_setting_negation(
)
selects.config_setting_group(
name = "platform_ios_with_gpu",
name = "platform_apple_with_gpu",
match_all = [
":not_disable_gpu",
"//mediapipe:ios",
"//mediapipe:apple",
],
)
selects.config_setting_group(
name = "platform_apple_without_gpu",
match_all = [
":disable_gpu",
"//mediapipe:apple",
],
)
@ -254,7 +262,7 @@ cc_test(
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep
"@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep
],
)
@ -303,7 +311,7 @@ cc_test(
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_sentencepiece//src:sentencepiece_processor", # fixdeps: keep
"@com_google_sentencepiece//:sentencepiece_processor", # fixdeps: keep
],
)
@ -614,7 +622,7 @@ cc_library(
":inference_calculator_interface",
] + select({
"//conditions:default": [":inference_calculator_gl_if_compute_shader_available"],
":platform_ios_with_gpu": [":inference_calculator_metal"],
":platform_apple_with_gpu": [":inference_calculator_metal"],
}),
alwayslink = 1,
)
@ -649,6 +657,7 @@ cc_library(
}),
deps = [
":tensor_converter_calculator_cc_proto",
":tensor_converter_cpu",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:port",
"//mediapipe/framework/formats:image_frame",
@ -657,6 +666,7 @@ cc_library(
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_buffer_format",
"//mediapipe/gpu:gpu_origin_cc_proto",
"//mediapipe/util:resource_util",
@ -666,10 +676,17 @@ cc_library(
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": ["tensor_converter_calculator_gpu_deps"],
"//conditions:default": [
"tensor_converter_calculator_gpu_deps",
"//mediapipe/gpu:gl_base",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:shader_util",
],
}) + select({
"//mediapipe:apple": [
"//third_party/apple_frameworks:MetalKit",
@ -679,6 +696,35 @@ cc_library(
alwayslink = 1,
)
cc_library(
name = "tensor_converter_cpu",
srcs = ["tensor_converter_cpu.cc"],
hdrs = ["tensor_converter_cpu.h"],
deps = [
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
],
)
cc_test(
name = "tensor_converter_cpu_test",
srcs = ["tensor_converter_cpu_test.cc"],
deps = [
":tensor_converter_cpu",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status_matchers",
"//mediapipe/util:image_test_utils",
],
)
cc_library(
name = "tensor_converter_calculator_gpu_deps",
visibility = ["//visibility:private"],
@ -687,12 +733,13 @@ cc_library(
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
],
"//mediapipe:ios": [
":platform_apple_with_gpu": [
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/objc:mediapipe_framework_ios",
],
"//mediapipe:macos": [],
# This setting is needed to allow bazel to build all targets on Mac with GPU disabled
":platform_apple_without_gpu": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
@ -777,11 +824,12 @@ cc_library(
name = "tensors_to_detections_calculator_gpu_deps",
visibility = ["//visibility:private"],
deps = select({
"//mediapipe:ios": [
":platform_apple_with_gpu": [
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/gpu:MPPMetalUtil",
],
"//mediapipe:macos": [],
# This setting is needed to allow bazel to build all targets on Mac with GPU disabled
":platform_apple_without_gpu": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
],
@ -980,6 +1028,48 @@ cc_test(
],
)
cc_library(
name = "tensor_to_joints_calculator",
srcs = ["tensor_to_joints_calculator.cc"],
hdrs = ["tensor_to_joints_calculator.h"],
deps = [
":tensor_to_joints_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:body_rig_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "tensor_to_joints_calculator_proto",
srcs = ["tensor_to_joints_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_test(
name = "tensor_to_joints_calculator_test",
srcs = ["tensor_to_joints_calculator_test.cc"],
deps = [
":tensor_to_joints_calculator",
":tensor_to_joints_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:body_rig_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "image_to_tensor_calculator",
srcs = ["image_to_tensor_calculator.cc"],
@ -1362,6 +1452,8 @@ cc_library(
}),
deps = [
":tensors_to_segmentation_calculator_cc_proto",
":tensors_to_segmentation_converter",
":tensors_to_segmentation_utils",
"//mediapipe/framework:calculator_context",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:port",
@ -1369,9 +1461,11 @@ cc_library(
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gpu_origin_cc_proto",
"//mediapipe/util:resource_util",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
@ -1382,11 +1476,12 @@ cc_library(
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_buffer_format",
"//mediapipe/gpu:shader_util",
],
}) + selects.with_or({
":gpu_inference_disabled": [],
"//mediapipe:ios": [
":platform_apple_with_gpu": [
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:MPPMetalHelper",
"//third_party/apple_frameworks:MetalKit",
@ -1401,13 +1496,109 @@ cc_library(
}) + select({
"//mediapipe/framework/port:disable_opencv": [],
"//conditions:default": [
"//mediapipe/framework/formats:image_opencv",
"//mediapipe/framework/port:opencv_imgproc",
":tensors_to_segmentation_converter_opencv",
],
}),
alwayslink = 1,
)
cc_library(
name = "tensors_to_segmentation_utils",
srcs = ["tensors_to_segmentation_utils.cc"],
hdrs = ["tensors_to_segmentation_utils.h"],
deps = [
"//mediapipe/framework:port",
"//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/status:statusor",
],
)
cc_test(
name = "tensors_to_segmentation_utils_test",
srcs = ["tensors_to_segmentation_utils_test.cc"],
deps = [
":tensors_to_segmentation_utils",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status_matchers",
"@com_google_absl//absl/status:statusor",
],
)
cc_library(
name = "tensors_to_segmentation_converter",
hdrs = ["tensors_to_segmentation_converter.h"],
deps = [
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:tensor",
"@com_google_absl//absl/status:statusor",
],
)
cc_library(
name = "tensors_to_segmentation_converter_opencv",
srcs = ["tensors_to_segmentation_converter_opencv.cc"],
hdrs = ["tensors_to_segmentation_converter_opencv.h"],
deps = [
":tensors_to_segmentation_calculator_cc_proto",
":tensors_to_segmentation_converter",
":tensors_to_segmentation_utils",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_opencv",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
],
)
cc_library(
name = "tensors_to_segmentation_calculator_test_utils",
testonly = 1,
srcs = ["tensors_to_segmentation_calculator_test_utils.cc"],
hdrs = ["tensors_to_segmentation_calculator_test_utils.h"],
deps = [
":tensors_to_segmentation_calculator_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "tensors_to_segmentation_calculator_test_utils_test",
srcs = ["tensors_to_segmentation_calculator_test_utils_test.cc"],
deps = [
":tensors_to_segmentation_calculator_cc_proto",
":tensors_to_segmentation_calculator_test_utils",
"//mediapipe/framework/port:gtest_main",
],
)
cc_test(
name = "tensors_to_segmentation_calculator_test",
srcs = ["tensors_to_segmentation_calculator_test.cc"],
deps = [
":tensors_to_segmentation_calculator",
":tensors_to_segmentation_calculator_cc_proto",
":tensors_to_segmentation_calculator_test_utils",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_opencv",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
],
)
cc_library(
name = "tensors_dequantization_calculator",
srcs = ["tensors_dequantization_calculator.cc"],

View File

@ -109,7 +109,7 @@ bool IsValidFftSize(int size) {
// Non-streaming mode: when "stream_mode" is set to false in the calculator
// options, the calculators treats the packets in the input audio stream as
// a batch of unrelated audio buffers. In each Process() call, the input
// buffer will be frist resampled, and framed as fixed-sized, possibly
// buffer will be first resampled, and framed as fixed-sized, possibly
// overlapping tensors. The last tensor produced by a Process() invocation
// will be zero-padding if the remaining samples are insufficient. As the
// calculator treats the input packets as unrelated, all samples will be
@ -159,7 +159,7 @@ class AudioToTensorCalculator : public Node {
public:
static constexpr Input<Matrix> kAudioIn{"AUDIO"};
// TODO: Removes this optional input stream when the "AUDIO" stream
// uses the new mediapipe audio data containers that carry audio metatdata,
// uses the new mediapipe audio data containers that carry audio metadata,
// such as sample rate.
static constexpr Input<double>::Optional kAudioSampleRateIn{"SAMPLE_RATE"};
static constexpr Output<std::vector<Tensor>> kTensorsOut{"TENSORS"};
@ -517,8 +517,8 @@ absl::Status AudioToTensorCalculator::OutputTensor(const Matrix& block,
// The last two elements are Nyquist component.
fft_output_matrix(fft_size_ - 2) = fft_output_[1]; // Nyquist real part
fft_output_matrix(fft_size_ - 1) = 0.0f; // Nyquist imagery part
ASSIGN_OR_RETURN(output_tensor, ConvertToTensor(fft_output_matrix,
{2, fft_size_ / 2}));
MP_ASSIGN_OR_RETURN(output_tensor, ConvertToTensor(fft_output_matrix,
{2, fft_size_ / 2}));
break;
}
case Options::WITH_DC_AND_NYQUIST: {
@ -529,7 +529,7 @@ absl::Status AudioToTensorCalculator::OutputTensor(const Matrix& block,
// The last two elements are Nyquist component.
fft_output_matrix(fft_size_) = fft_output_[1]; // Nyquist real part
fft_output_matrix(fft_size_ + 1) = 0.0f; // Nyquist imagery part
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
output_tensor,
ConvertToTensor(fft_output_matrix, {2, (fft_size_ + 2) / 2}));
break;
@ -537,7 +537,7 @@ absl::Status AudioToTensorCalculator::OutputTensor(const Matrix& block,
case Options::WITHOUT_DC_AND_NYQUIST: {
Matrix fft_output_matrix =
Eigen::Map<const Matrix>(fft_output_.data() + 2, 1, fft_size_ - 2);
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
output_tensor,
ConvertToTensor(fft_output_matrix, {2, (fft_size_ - 2) / 2}));
break;
@ -547,8 +547,8 @@ absl::Status AudioToTensorCalculator::OutputTensor(const Matrix& block,
}
} else {
ASSIGN_OR_RETURN(output_tensor,
ConvertToTensor(block, {num_channels_, num_samples_}));
MP_ASSIGN_OR_RETURN(output_tensor,
ConvertToTensor(block, {num_channels_, num_samples_}));
}
kTensorsOut(cc).Send(std::move(output_tensor), timestamp);
return absl::OkStatus();

View File

@ -37,7 +37,7 @@ message AudioToTensorCalculatorOptions {
// will be converted into tensors.
optional double target_sample_rate = 4;
// Whether to treat the input audio stream as a continous stream or a batch
// Whether to treat the input audio stream as a continuous stream or a batch
// of unrelated audio buffers.
optional bool stream_mode = 5 [default = true];

View File

@ -68,7 +68,6 @@ constexpr absl::string_view kSeparatorToken = "[SEP]";
//
// This calculator is currently configured for the TextClassifier Task but it
// will eventually be generalized for other Text Tasks.
// TODO: Handle preprocessing for other Text Tasks too.
//
// Inputs:
// TEXT - std::string
@ -161,9 +160,9 @@ absl::Status BertPreprocessorCalculator::Open(CalculatorContext* cc) {
&kMetadataExtractorSideIn(cc).Get();
const tflite::ProcessUnit* tokenizer_metadata =
metadata_extractor->GetInputProcessUnit(kTokenizerProcessUnitIndex);
ASSIGN_OR_RETURN(tokenizer_,
tasks::text::tokenizers::CreateTokenizerFromProcessUnit(
tokenizer_metadata, metadata_extractor));
MP_ASSIGN_OR_RETURN(tokenizer_,
tasks::text::tokenizers::CreateTokenizerFromProcessUnit(
tokenizer_metadata, metadata_extractor));
auto* input_tensors_metadata = metadata_extractor->GetInputTensorMetadata();
input_ids_tensor_index_ = FindTensorIndexByMetadataName(

View File

@ -67,9 +67,10 @@ absl::StatusOr<std::vector<std::vector<int>>> RunBertPreprocessorCalculator(
tool::AddVectorSink("tensors", &graph_config, &output_packets);
std::string model_buffer = tasks::core::LoadBinaryContent(model_path.data());
ASSIGN_OR_RETURN(std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(
model_buffer.data(), model_buffer.size()));
MP_ASSIGN_OR_RETURN(
std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(model_buffer.data(),
model_buffer.size()));
// Run the graph.
CalculatorGraph graph;
MP_RETURN_IF_ERROR(graph.Initialize(

View File

@ -82,7 +82,7 @@ namespace api2 {
//
// Outputs:
// TENSORS - std::vector<Tensor>
// Vector containing a single Tensor populated with an extrated RGB image.
// Vector containing a single Tensor populated with an extracted RGB image.
// MATRIX - std::array<float, 16> @Optional
// An std::array<float, 16> representing a 4x4 row-major-order matrix that
// maps a point on the input image to a point on the output tensor, and
@ -192,18 +192,19 @@ class ImageToTensorCalculator : public Node {
}
#if MEDIAPIPE_DISABLE_GPU
ASSIGN_OR_RETURN(auto image, GetInputImage(kIn(cc)));
MP_ASSIGN_OR_RETURN(auto image, GetInputImage(kIn(cc)));
#else
const bool is_input_gpu = kInGpu(cc).IsConnected();
ASSIGN_OR_RETURN(auto image, is_input_gpu ? GetInputImage(kInGpu(cc))
: GetInputImage(kIn(cc)));
MP_ASSIGN_OR_RETURN(auto image, is_input_gpu ? GetInputImage(kInGpu(cc))
: GetInputImage(kIn(cc)));
#endif // MEDIAPIPE_DISABLE_GPU
RotatedRect roi = GetRoi(image->width(), image->height(), norm_rect);
const int tensor_width = params_.output_width.value_or(image->width());
const int tensor_height = params_.output_height.value_or(image->height());
ASSIGN_OR_RETURN(auto padding, PadRoi(tensor_width, tensor_height,
options_.keep_aspect_ratio(), &roi));
MP_ASSIGN_OR_RETURN(auto padding,
PadRoi(tensor_width, tensor_height,
options_.keep_aspect_ratio(), &roi));
if (kOutLetterboxPadding(cc).IsConnected()) {
kOutLetterboxPadding(cc).Send(padding);
}
@ -211,7 +212,7 @@ class ImageToTensorCalculator : public Node {
std::array<float, 16> matrix;
GetRotatedSubRectToRectTransformMatrix(
roi, image->width(), image->height(),
/*flip_horizontaly=*/false, &matrix);
/*flip_horizontally=*/false, &matrix);
kOutMatrix(cc).Send(std::move(matrix));
}
@ -247,20 +248,20 @@ class ImageToTensorCalculator : public Node {
if (!gpu_converter_) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
gpu_converter_,
CreateMetalConverter(cc, GetBorderMode(options_.border_mode())));
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
ASSIGN_OR_RETURN(gpu_converter_,
CreateImageToGlBufferTensorConverter(
cc, DoesGpuInputStartAtBottom(options_),
GetBorderMode(options_.border_mode())));
MP_ASSIGN_OR_RETURN(gpu_converter_,
CreateImageToGlBufferTensorConverter(
cc, DoesGpuInputStartAtBottom(options_),
GetBorderMode(options_.border_mode())));
#else
if (!gpu_converter_) {
ASSIGN_OR_RETURN(gpu_converter_,
CreateImageToGlTextureTensorConverter(
cc, DoesGpuInputStartAtBottom(options_),
GetBorderMode(options_.border_mode())));
MP_ASSIGN_OR_RETURN(gpu_converter_,
CreateImageToGlTextureTensorConverter(
cc, DoesGpuInputStartAtBottom(options_),
GetBorderMode(options_.border_mode())));
}
if (!gpu_converter_) {
return absl::UnimplementedError(
@ -272,18 +273,20 @@ class ImageToTensorCalculator : public Node {
} else {
if (!cpu_converter_) {
#if !MEDIAPIPE_DISABLE_OPENCV
ASSIGN_OR_RETURN(cpu_converter_,
CreateOpenCvConverter(
cc, GetBorderMode(options_.border_mode()),
GetOutputTensorType(/*uses_gpu=*/false, params_)));
MP_ASSIGN_OR_RETURN(
cpu_converter_,
CreateOpenCvConverter(
cc, GetBorderMode(options_.border_mode()),
GetOutputTensorType(/*uses_gpu=*/false, params_)));
// TODO: FrameBuffer-based converter needs to call GetGpuBuffer()
// to get access to a FrameBuffer view. Investigate if GetGpuBuffer() can be
// made available even with MEDIAPIPE_DISABLE_GPU set.
#elif MEDIAPIPE_ENABLE_HALIDE
ASSIGN_OR_RETURN(cpu_converter_,
CreateFrameBufferConverter(
cc, GetBorderMode(options_.border_mode()),
GetOutputTensorType(/*uses_gpu=*/false, params_)));
MP_ASSIGN_OR_RETURN(
cpu_converter_,
CreateFrameBufferConverter(
cc, GetBorderMode(options_.border_mode()),
GetOutputTensorType(/*uses_gpu=*/false, params_)));
#else
ABSL_LOG(FATAL) << "Cannot create image to tensor CPU converter since "
"MEDIAPIPE_DISABLE_OPENCV is defined and "

View File

@ -206,7 +206,7 @@ mediapipe::ImageFormat::Format GetImageFormat(int image_channels) {
} else if (image_channels == 1) {
return ImageFormat::GRAY8;
}
ABSL_CHECK(false) << "Unsupported input image channles: " << image_channels;
ABSL_CHECK(false) << "Unsupported input image channels: " << image_channels;
}
Packet MakeImageFramePacket(cv::Mat input) {

View File

@ -175,9 +175,9 @@ absl::Status FrameBufferProcessor::CropRotateResize90Degrees(
cropped_buffer_ = std::make_unique<uint8_t[]>(cropped_buffer_size);
cropped_buffer_size_ = cropped_buffer_size;
}
ASSIGN_OR_RETURN(cropped,
frame_buffer::CreateFromRawBuffer(
cropped_buffer_.get(), cropped_dims, input->format()));
MP_ASSIGN_OR_RETURN(
cropped, frame_buffer::CreateFromRawBuffer(
cropped_buffer_.get(), cropped_dims, input->format()));
}
MP_RETURN_IF_ERROR(
frame_buffer::Crop(*input, left, top, right, bottom, cropped.get()));
@ -194,9 +194,9 @@ absl::Status FrameBufferProcessor::CropRotateResize90Degrees(
rotated_buffer_ = std::make_unique<uint8_t[]>(rotated_buffer_size);
rotated_buffer_size_ = rotated_buffer_size;
}
ASSIGN_OR_RETURN(auto rotated, frame_buffer::CreateFromRawBuffer(
rotated_buffer_.get(), rotated_dims,
cropped->format()));
MP_ASSIGN_OR_RETURN(auto rotated, frame_buffer::CreateFromRawBuffer(
rotated_buffer_.get(), rotated_dims,
cropped->format()));
}
MP_RETURN_IF_ERROR(
frame_buffer::Rotate(*cropped, rotation_degrees, rotated.get()));
@ -217,9 +217,10 @@ absl::Status FrameBufferProcessor::ConvertToFloatTensor(
RET_CHECK(output_tensor.element_type() == Tensor::ElementType::kFloat32);
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 255.0f;
ASSIGN_OR_RETURN(auto transform, GetValueRangeTransformation(
kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
MP_ASSIGN_OR_RETURN(
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
return frame_buffer::ToFloatTensor(*input_frame, transform.scale,
transform.offset, output_tensor);
}

View File

@ -57,7 +57,7 @@ class SubRectExtractorGl {
absl::Status ExtractSubRectToBuffer(
const tflite::gpu::gl::GlTexture& texture,
const tflite::gpu::HW& texture_size, const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha, float beta,
bool flip_horizontally, float alpha, float beta,
const tflite::gpu::HW& destination_size,
tflite::gpu::gl::CommandQueue* command_queue,
tflite::gpu::gl::GlBuffer* destination);
@ -154,13 +154,13 @@ void main() {
absl::Status SubRectExtractorGl::ExtractSubRectToBuffer(
const tflite::gpu::gl::GlTexture& texture,
const tflite::gpu::HW& texture_size, const RotatedRect& texture_sub_rect,
bool flip_horizontaly, float alpha, float beta,
bool flip_horizontally, float alpha, float beta,
const tflite::gpu::HW& destination_size,
tflite::gpu::gl::CommandQueue* command_queue,
tflite::gpu::gl::GlBuffer* destination) {
std::array<float, 16> transform_mat;
GetRotatedSubRectToRectTransformMatrix(texture_sub_rect, texture_size.w,
texture_size.h, flip_horizontaly,
texture_size.h, flip_horizontally,
&transform_mat);
MP_RETURN_IF_ERROR(texture.BindAsSampler2D(0));
@ -255,7 +255,7 @@ class GlProcessor : public ImageToTensorConverter {
<< "OpenGL ES 3.1 is required.";
command_queue_ = tflite::gpu::gl::NewCommandQueue(gpu_info);
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto extractor,
SubRectExtractorGl::Create(gl_helper_.GetGlContext(),
input_starts_at_bottom, border_mode));
@ -293,10 +293,10 @@ class GlProcessor : public ImageToTensorConverter {
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(kInputImageRangeMin,
kInputImageRangeMax,
range_min, range_max));
MP_ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(
kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
const int output_size = output_tensor.bytes() / output_shape.dims[0];
auto buffer_view = output_tensor.GetOpenGlBufferWriteView();
@ -308,7 +308,7 @@ class GlProcessor : public ImageToTensorConverter {
input_texture,
tflite::gpu::HW(source_texture.height(), source_texture.width()),
roi,
/*flip_horizontaly=*/false, transform.scale, transform.offset,
/*flip_horizontally=*/false, transform.scale, transform.offset,
tflite::gpu::HW(output_shape.dims[1], output_shape.dims[2]),
command_queue_.get(), &output));

View File

@ -193,13 +193,13 @@ class GlProcessor : public ImageToTensorConverter {
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(kInputImageRangeMin,
kInputImageRangeMax,
range_min, range_max));
MP_ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(
kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
auto tensor_view = output_tensor.GetOpenGlTexture2dWriteView();
MP_RETURN_IF_ERROR(ExtractSubRect(input_texture, roi,
/*flip_horizontaly=*/false,
/*flip_horizontally=*/false,
transform.scale, transform.offset,
output_shape, &tensor_view));
return absl::OkStatus();
@ -210,7 +210,7 @@ class GlProcessor : public ImageToTensorConverter {
absl::Status ExtractSubRect(const mediapipe::GlTexture& texture,
const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha, float beta,
bool flip_horizontally, float alpha, float beta,
const Tensor::Shape& output_shape,
Tensor::OpenGlTexture2dView* output) {
const int output_height = output_shape.dims[1];
@ -263,13 +263,13 @@ class GlProcessor : public ImageToTensorConverter {
ABSL_LOG_IF(FATAL, !gl_context) << "GlContext is not bound to the thread.";
if (gl_context->GetGlVersion() == mediapipe::GlVersion::kGLES2) {
GetTransposedRotatedSubRectToRectTransformMatrix(
sub_rect, texture.width(), texture.height(), flip_horizontaly,
sub_rect, texture.width(), texture.height(), flip_horizontally,
&transform_mat);
glUniformMatrix4fv(matrix_id_, 1, GL_FALSE, transform_mat.data());
} else {
GetRotatedSubRectToRectTransformMatrix(sub_rect, texture.width(),
texture.height(), flip_horizontaly,
&transform_mat);
texture.height(),
flip_horizontally, &transform_mat);
glUniformMatrix4fv(matrix_id_, 1, GL_TRUE, transform_mat.data());
}
@ -304,6 +304,7 @@ class GlProcessor : public ImageToTensorConverter {
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
glFlush();
return absl::OkStatus();
}

View File

@ -179,13 +179,13 @@ class SubRectExtractorMetal {
}
absl::Status Execute(id<MTLTexture> input_texture,
const RotatedRect& sub_rect, bool flip_horizontaly,
const RotatedRect& sub_rect, bool flip_horizontally,
float alpha, float beta,
const tflite::gpu::HW& destination_size,
id<MTLCommandBuffer> command_buffer,
id<MTLBuffer> destination) {
auto output_texture = MTLTextureWithBuffer(destination_size, destination);
return InternalExecute(input_texture, sub_rect, flip_horizontaly, alpha,
return InternalExecute(input_texture, sub_rect, flip_horizontally, alpha,
beta, destination_size, command_buffer,
output_texture);
}
@ -211,7 +211,7 @@ class SubRectExtractorMetal {
absl::Status InternalExecute(id<MTLTexture> input_texture,
const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha, float beta,
bool flip_horizontally, float alpha, float beta,
const tflite::gpu::HW& destination_size,
id<MTLCommandBuffer> command_buffer,
id<MTLTexture> output_texture) {
@ -223,7 +223,7 @@ class SubRectExtractorMetal {
std::array<float, 16> transform_mat;
GetRotatedSubRectToRectTransformMatrix(sub_rect, input_texture.width,
input_texture.height,
flip_horizontaly, &transform_mat);
flip_horizontally, &transform_mat);
id<MTLBuffer> transform_mat_buffer =
[device_ newBufferWithBytes:&transform_mat
length:sizeof(transform_mat)
@ -345,9 +345,9 @@ class MetalProcessor : public ImageToTensorConverter {
absl::Status Init(CalculatorContext* cc, BorderMode border_mode) {
metal_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(metal_helper_);
ASSIGN_OR_RETURN(extractor_, SubRectExtractorMetal::Make(
metal_helper_.mtlDevice,
OutputFormat::kF32C4, border_mode));
MP_ASSIGN_OR_RETURN(extractor_, SubRectExtractorMetal::Make(
metal_helper_.mtlDevice,
OutputFormat::kF32C4, border_mode));
return absl::OkStatus();
}
@ -373,7 +373,7 @@ class MetalProcessor : public ImageToTensorConverter {
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
@ -383,7 +383,7 @@ class MetalProcessor : public ImageToTensorConverter {
MtlBufferView::GetWriteView(output_tensor, command_buffer);
MP_RETURN_IF_ERROR(extractor_->Execute(
texture, roi,
/*flip_horizontaly=*/false, transform.scale, transform.offset,
/*flip_horizontally=*/false, transform.scale, transform.offset,
tflite::gpu::HW(output_shape.dims[1], output_shape.dims[2]),
command_buffer, buffer_view.buffer()));
[command_buffer commit];

View File

@ -159,7 +159,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 255.0f;
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));

View File

@ -92,7 +92,7 @@ absl::StatusOr<ValueTransformation> GetValueRangeTransformation(
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
int rect_width, int rect_height,
bool flip_horizontaly,
bool flip_horizontally,
std::array<float, 16>* matrix_ptr) {
std::array<float, 16>& matrix = *matrix_ptr;
// The resulting matrix is multiplication of below commented out matrices:
@ -118,7 +118,7 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
// {0.0f, 0.0f, a, 0.0f}
// {0.0f, 0.0f, 0.0f, 1.0f}
const float flip = flip_horizontaly ? -1 : 1;
const float flip = flip_horizontally ? -1 : 1;
// Matrix for optional horizontal flip around middle of output image.
// { fl , 0.0f, 0.0f, 0.0f}
// { 0.0f, 1.0f, 0.0f, 0.0f}
@ -177,13 +177,13 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
void GetTransposedRotatedSubRectToRectTransformMatrix(
const RotatedRect& sub_rect, int rect_width, int rect_height,
bool flip_horizontaly, std::array<float, 16>* matrix_ptr) {
bool flip_horizontally, std::array<float, 16>* matrix_ptr) {
std::array<float, 16>& matrix = *matrix_ptr;
// See comments in GetRotatedSubRectToRectTransformMatrix for detailed
// calculations.
const float a = sub_rect.width;
const float b = sub_rect.height;
const float flip = flip_horizontaly ? -1 : 1;
const float flip = flip_horizontally ? -1 : 1;
const float c = std::cos(sub_rect.rotation);
const float d = std::sin(sub_rect.rotation);
const float e = sub_rect.center_x;

View File

@ -74,7 +74,7 @@ absl::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
// Represents a transformation of value which involves scaling and offsetting.
// To apply transformation:
// ValueTransformation transform = ...
// float transformed_value = transform.scale * value + transfrom.offset;
// float transformed_value = transform.scale * value + transform.offset;
struct ValueTransformation {
float scale;
float offset;
@ -99,11 +99,11 @@ absl::StatusOr<ValueTransformation> GetValueRangeTransformation(
// @sub_rect - rotated sub rect in absolute coordinates
// @rect_width - rect width
// @rect_height - rect height
// @flip_horizontaly - we need to flip the output buffer.
// @flip_horizontally - we need to flip the output buffer.
// @matrix - 4x4 matrix (array of 16 elements) to populate
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
int rect_width, int rect_height,
bool flip_horizontaly,
bool flip_horizontally,
std::array<float, 16>* matrix);
// Returns the transpose of the matrix found with
@ -118,11 +118,11 @@ void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
// @sub_rect - rotated sub rect in absolute coordinates
// @rect_width - rect width
// @rect_height - rect height
// @flip_horizontaly - we need to flip the output buffer.
// @flip_horizontally - we need to flip the output buffer.
// @matrix - 4x4 matrix (array of 16 elements) to populate
void GetTransposedRotatedSubRectToRectTransformMatrix(
const RotatedRect& sub_rect, int rect_width, int rect_height,
bool flip_horizontaly, std::array<float, 16>* matrix);
bool flip_horizontally, std::array<float, 16>* matrix);
// Validates the output dimensions set in the option proto. The input option
// proto is expected to have to following fields:

View File

@ -60,7 +60,7 @@ absl::Status InferenceCalculatorCpuImpl::UpdateContract(
}
absl::Status InferenceCalculatorCpuImpl::Open(CalculatorContext* cc) {
ASSIGN_OR_RETURN(inference_runner_, CreateInferenceRunner(cc));
MP_ASSIGN_OR_RETURN(inference_runner_, CreateInferenceRunner(cc));
return absl::OkStatus();
}
@ -71,8 +71,8 @@ absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
const auto& input_tensors = *kInTensors(cc);
RET_CHECK(!input_tensors.empty());
ASSIGN_OR_RETURN(std::vector<Tensor> output_tensors,
inference_runner_->Run(cc, input_tensors));
MP_ASSIGN_OR_RETURN(std::vector<Tensor> output_tensors,
inference_runner_->Run(cc, input_tensors));
kOutTensors(cc).Send(std::move(output_tensors));
return absl::OkStatus();
}
@ -84,11 +84,11 @@ absl::Status InferenceCalculatorCpuImpl::Close(CalculatorContext* cc) {
absl::StatusOr<std::unique_ptr<InferenceRunner>>
InferenceCalculatorCpuImpl::CreateInferenceRunner(CalculatorContext* cc) {
ASSIGN_OR_RETURN(auto model_packet, GetModelAsPacket(cc));
ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
MP_ASSIGN_OR_RETURN(auto model_packet, GetModelAsPacket(cc));
MP_ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
const int interpreter_num_threads =
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread();
ASSIGN_OR_RETURN(TfLiteDelegatePtr delegate, MaybeCreateDelegate(cc));
MP_ASSIGN_OR_RETURN(TfLiteDelegatePtr delegate, MaybeCreateDelegate(cc));
return CreateInferenceInterpreterDelegateRunner(
std::move(model_packet), std::move(op_resolver_packet),
std::move(delegate), interpreter_num_threads);

View File

@ -100,7 +100,7 @@ absl::Status InferenceCalculatorGlImpl::GpuInferenceRunner::Init(
absl::Status InferenceCalculatorGlImpl::GpuInferenceRunner::LoadModel(
CalculatorContext* cc) {
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
const auto& model = *model_packet_.Get();
if (kSideInOpResolver(cc).IsConnected()) {
const tflite::OpResolver& op_resolver = kSideInOpResolver(cc).Get();

View File

@ -170,7 +170,7 @@ absl::Status
InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::InitTFLiteGPURunner(
CalculatorContext* cc,
const mediapipe::InferenceCalculatorOptions::Delegate& delegate) {
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
const auto& model = *model_packet_.Get();
bool allow_precision_loss = delegate.gpu().allow_precision_loss();
@ -306,16 +306,16 @@ InferenceCalculatorGlAdvancedImpl::OnDiskCacheHelper::SaveGpuCaches(
tflite::gpu::TFLiteGPURunner* gpu_runner) const {
if (use_kernel_caching_) {
// Save kernel file.
ASSIGN_OR_RETURN(std::vector<uint8_t> kernel_cache,
gpu_runner->GetSerializedBinaryCache());
MP_ASSIGN_OR_RETURN(std::vector<uint8_t> kernel_cache,
gpu_runner->GetSerializedBinaryCache());
std::string cache_str(kernel_cache.begin(), kernel_cache.end());
MP_RETURN_IF_ERROR(
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
}
if (use_serialized_model_) {
// Save serialized model file.
ASSIGN_OR_RETURN(std::vector<uint8_t> serialized_model_vec,
gpu_runner->GetSerializedModel());
MP_ASSIGN_OR_RETURN(std::vector<uint8_t> serialized_model_vec,
gpu_runner->GetSerializedModel());
absl::string_view serialized_model(
reinterpret_cast<char*>(serialized_model_vec.data()),
serialized_model_vec.size());
@ -412,8 +412,8 @@ absl::Status InferenceCalculatorGlAdvancedImpl::Process(CalculatorContext* cc) {
RET_CHECK(!input_tensors.empty());
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
ASSIGN_OR_RETURN(*output_tensors,
gpu_inference_runner_->Process(cc, input_tensors));
MP_ASSIGN_OR_RETURN(*output_tensors,
gpu_inference_runner_->Process(cc, input_tensors));
kOutTensors(cc).Send(std::move(output_tensors));
return absl::OkStatus();

View File

@ -191,6 +191,11 @@ absl::Status InferenceCalculatorMetalImpl::Process(CalculatorContext* cc) {
[output_encoder endEncoding];
}
[command_buffer commit];
// The below call is found (manual testing) to resolve flickering issues for
// some use cases where multiple Metal calculators are involved.
// TODO: investigate and ensure proper synchronization
// (e.g. fences/barriers/events).
[command_buffer waitUntilScheduled];
kOutTensors(cc).Send(std::move(output_tensors));
return absl::OkStatus();
@ -208,9 +213,9 @@ absl::Status InferenceCalculatorMetalImpl::Close(CalculatorContext* cc) {
absl::Status InferenceCalculatorMetalImpl::InitInterpreter(
CalculatorContext* cc) {
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(cc));
const auto& model = *model_packet_.Get();
ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
MP_ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
const auto& op_resolver = op_resolver_packet.Get();
tflite::InterpreterBuilder interpreter_builder(model, op_resolver);
AddDelegate(cc, &interpreter_builder);

View File

@ -58,7 +58,7 @@ absl::Status InferenceCalculatorXnnpackImpl::UpdateContract(
}
absl::Status InferenceCalculatorXnnpackImpl::Open(CalculatorContext* cc) {
ASSIGN_OR_RETURN(inference_runner_, CreateInferenceRunner(cc));
MP_ASSIGN_OR_RETURN(inference_runner_, CreateInferenceRunner(cc));
return absl::OkStatus();
}
@ -69,8 +69,8 @@ absl::Status InferenceCalculatorXnnpackImpl::Process(CalculatorContext* cc) {
const auto& input_tensors = *kInTensors(cc);
RET_CHECK(!input_tensors.empty());
ASSIGN_OR_RETURN(std::vector<Tensor> output_tensors,
inference_runner_->Run(cc, input_tensors));
MP_ASSIGN_OR_RETURN(std::vector<Tensor> output_tensors,
inference_runner_->Run(cc, input_tensors));
kOutTensors(cc).Send(std::move(output_tensors));
return absl::OkStatus();
}
@ -82,11 +82,11 @@ absl::Status InferenceCalculatorXnnpackImpl::Close(CalculatorContext* cc) {
absl::StatusOr<std::unique_ptr<InferenceRunner>>
InferenceCalculatorXnnpackImpl::CreateInferenceRunner(CalculatorContext* cc) {
ASSIGN_OR_RETURN(auto model_packet, GetModelAsPacket(cc));
ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
MP_ASSIGN_OR_RETURN(auto model_packet, GetModelAsPacket(cc));
MP_ASSIGN_OR_RETURN(auto op_resolver_packet, GetOpResolverAsPacket(cc));
const int interpreter_num_threads =
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread();
ASSIGN_OR_RETURN(TfLiteDelegatePtr delegate, CreateDelegate(cc));
MP_ASSIGN_OR_RETURN(TfLiteDelegatePtr delegate, CreateDelegate(cc));
return CreateInferenceInterpreterDelegateRunner(
std::move(model_packet), std::move(op_resolver_packet),
std::move(delegate), interpreter_num_threads);

View File

@ -106,7 +106,7 @@ absl::Status RegexPreprocessorCalculator::Open(CalculatorContext* cc) {
return absl::InvalidArgumentError("No tensor metadata found");
}
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
const auto* tokenizer_metadata,
metadata_extractor->FindFirstProcessUnit(
*tensor_metadata, tflite::ProcessUnitOptions_RegexTokenizerOptions));
@ -115,9 +115,9 @@ absl::Status RegexPreprocessorCalculator::Open(CalculatorContext* cc) {
}
const tflite::RegexTokenizerOptions* regex_tokenizer_options =
tokenizer_metadata->options_as<tflite::RegexTokenizerOptions>();
ASSIGN_OR_RETURN(tokenizer_,
tasks::text::tokenizers::CreateRegexTokenizerFromOptions(
regex_tokenizer_options, metadata_extractor));
MP_ASSIGN_OR_RETURN(tokenizer_,
tasks::text::tokenizers::CreateRegexTokenizerFromOptions(
regex_tokenizer_options, metadata_extractor));
const auto& options =
cc->Options<mediapipe::RegexPreprocessorCalculatorOptions>();

View File

@ -67,9 +67,10 @@ absl::StatusOr<std::vector<int>> RunRegexPreprocessorCalculator(
tool::AddVectorSink("tensors", &graph_config, &output_packets);
std::string model_buffer = tasks::core::LoadBinaryContent(kTestModelPath);
ASSIGN_OR_RETURN(std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(
model_buffer.data(), model_buffer.size()));
MP_ASSIGN_OR_RETURN(
std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(model_buffer.data(),
model_buffer.size()));
// Run the graph.
CalculatorGraph graph;
MP_RETURN_IF_ERROR(graph.Initialize(

View File

@ -14,6 +14,7 @@
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
#include "absl/log/absl_check.h"
@ -21,17 +22,22 @@
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/substitute.h"
#include "absl/types/optional.h"
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensor_converter_cpu.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_base.h"
#include "mediapipe/gpu/gpu_buffer.h"
#if MEDIAPIPE_METAL_ENABLED
#import <CoreVideo/CoreVideo.h>
@ -94,16 +100,13 @@ absl::StatusOr<bool> ShouldFlipVertically(
}
}
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
RowMajorMatrixXf;
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
ColMajorMatrixXf;
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kMatrixTag[] = "MATRIX";
constexpr std::pair<float, float> kDefaultOutputRange = {0.0f, 1.0f};
} // namespace
namespace mediapipe {
@ -156,10 +159,6 @@ class TensorConverterCalculator : public CalculatorBase {
private:
absl::Status InitGpu(CalculatorContext* cc);
absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu);
template <class T>
absl::Status NormalizeImage(const ImageFrame& image_frame,
bool flip_vertically, float* tensor_ptr);
absl::Status CopyMatrixToTensor(const Matrix& matrix, float* tensor_ptr);
absl::Status ProcessCPU(CalculatorContext* cc);
absl::Status ProcessGPU(CalculatorContext* cc);
@ -279,46 +278,21 @@ absl::Status TensorConverterCalculator::ProcessCPU(CalculatorContext* cc) {
}
const auto& image_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels_);
const mediapipe::ImageFormat::Format format = image_frame.Format();
if (!(format == mediapipe::ImageFormat::SRGBA ||
format == mediapipe::ImageFormat::SRGB ||
format == mediapipe::ImageFormat::GRAY8 ||
format == mediapipe::ImageFormat::VEC32F1))
RET_CHECK_FAIL() << "Unsupported CPU input format.";
output_tensors->emplace_back(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels_preserved});
auto cpu_view = output_tensors->back().GetCpuWriteView();
// Copy image data into tensor.
if (image_frame.ByteDepth() == 1) {
MP_RETURN_IF_ERROR(NormalizeImage<uint8_t>(image_frame, flip_vertically_,
cpu_view.buffer<float>()));
} else if (image_frame.ByteDepth() == 4) {
MP_RETURN_IF_ERROR(NormalizeImage<float>(image_frame, flip_vertically_,
cpu_view.buffer<float>()));
} else {
return absl::InternalError(
"Only byte-based (8 bit) and float (32 bit) images supported.");
}
MP_ASSIGN_OR_RETURN(Tensor output,
ConvertImageFrameToTensorOnCpu(
image_frame,
output_range_.has_value() ? output_range_.value()
: kDefaultOutputRange,
flip_vertically_, max_num_channels_));
output_tensors->emplace_back(std::move(output));
} else if (cc->Inputs().HasTag(kMatrixTag)) {
if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get<Matrix>();
const int height = matrix.rows();
const int width = matrix.cols();
const int channels = 1;
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels});
MP_RETURN_IF_ERROR(CopyMatrixToTensor(
matrix, output_tensors->back().GetCpuWriteView().buffer<float>()));
MP_ASSIGN_OR_RETURN(Tensor output,
ConvertMatrixToTensorOnCpu(matrix, row_major_matrix_));
output_tensors->emplace_back(std::move(output));
} else {
return absl::OkStatus();
}
@ -406,6 +380,7 @@ absl::Status TensorConverterCalculator::ProcessGPU(CalculatorContext* cc) {
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
glFlush();
src.Release();
return absl::OkStatus();
}));
@ -655,7 +630,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc,
}
// Get y-flip mode.
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu));
MP_ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu));
// Get row_major_matrix mode.
row_major_matrix_ = options.row_major_matrix();
@ -668,67 +643,4 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc,
return absl::OkStatus();
}
template <class T>
absl::Status TensorConverterCalculator::NormalizeImage(
const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr) {
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels_);
const int channels_ignored = channels - channels_preserved;
if (output_range_.has_value()) {
// If the output float range is set and we are not using custom
// normalization, normalize the pixel values from [0, 255] to the specified
// output range.
RET_CHECK_NE(output_range_->first, output_range_->second);
const float scale = (output_range_->second - output_range_->first) / 255.0f;
const float bias = output_range_->first;
for (int i = 0; i < height; ++i) {
const T* image_ptr = reinterpret_cast<const T*>(
image_frame.PixelData() +
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_ptr++ = *image_ptr++ * scale + bias;
}
image_ptr += channels_ignored;
}
}
} else {
// [0,1], scale only (bias == 0)
// Verified that there are no precision issues with 1.0f / 255.0f expression
const float scale = 1.0f / 255.0f;
for (int i = 0; i < height; ++i) {
const T* image_ptr = reinterpret_cast<const T*>(
image_frame.PixelData() +
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_ptr++ = *image_ptr++ * scale;
}
image_ptr += channels_ignored;
}
}
}
return absl::OkStatus();
}
absl::Status TensorConverterCalculator::CopyMatrixToTensor(const Matrix& matrix,
float* tensor_ptr) {
if (row_major_matrix_) {
auto matrix_map =
Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
} else {
auto matrix_map =
Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -32,7 +32,7 @@ message TensorConverterCalculatorOptions {
// Custom settings to override the internal scaling factors `div` and `sub`.
// Both values must be set to non-negative values. Will only take effect on
// CPU AND when |use_custom_normalization| is set to true. When these custom
// values take effect, the |zero_center| setting above will be overriden, and
// values take effect, the |zero_center| setting above will be overridden, and
// the normalized_value will be calculated as:
// normalized_value = input / custom_div - custom_sub.
optional bool use_custom_normalization = 6 [default = false];

View File

@ -321,6 +321,61 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
}
}
TEST_F(TensorConverterCalculatorTest,
ShouldConvertImageWithDefaultOutputRange) {
CalculatorGraph graph;
CalculatorGraphConfig graph_config =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
R"pb(
input_stream: "input_image"
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:input_image"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
zero_center: false
}
}
}
)pb");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
mat.at<uint8_t>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
// Wait until the calculator finishes processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_EQ(output_packets.size(), 1);
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
ASSERT_EQ(tensor_vec.size(), 1);
const Tensor* tensor = &tensor_vec[0];
// Calculate the expected normalized value:
float expected_value = 200.0 / 255.0;
EXPECT_EQ(tensor->element_type(), Tensor::ElementType::kFloat32);
auto view = tensor->GetCpuReadView();
float actual_value = *view.buffer<float>();
EXPECT_FLOAT_EQ(actual_value, expected_value);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(TensorConverterCalculatorTest, FlipVertically) {
CalculatorGraph graph;
CalculatorGraphConfig graph_config =

View File

@ -0,0 +1,145 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensor_converter_cpu.h"
#include <algorithm>
#include <cstdint>
#include <utility>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
namespace mediapipe {
namespace {
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
RowMajorMatrixXf;
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
ColMajorMatrixXf;
template <class T>
absl::Status NormalizeImage(const ImageFrame& image_frame, bool flip_vertically,
const std::pair<float, float>& output_range,
int max_num_channels, float* tensor_ptr) {
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels);
const int channels_ignored = channels - channels_preserved;
RET_CHECK_NE(output_range.first, output_range.second);
const float scale = (output_range.second - output_range.first) / 255.0f;
const float bias = output_range.first;
for (int i = 0; i < height; ++i) {
const T* image_ptr = reinterpret_cast<const T*>(
image_frame.PixelData() +
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_ptr++ = *image_ptr++ * scale + bias;
}
image_ptr += channels_ignored;
}
}
return absl::OkStatus();
}
} // namespace
absl::Status NormalizeUInt8Image(const ImageFrame& image_frame,
bool flip_vertically,
const std::pair<float, float>& output_range,
int max_num_channels, float* tensor_ptr) {
return NormalizeImage<uint8_t>(image_frame, flip_vertically, output_range,
max_num_channels, tensor_ptr);
}
absl::Status NormalizeFloatImage(const ImageFrame& image_frame,
bool flip_vertically,
const std::pair<float, float>& output_range,
int max_num_channels, float* tensor_ptr) {
return NormalizeImage<float>(image_frame, flip_vertically, output_range,
max_num_channels, tensor_ptr);
}
absl::Status CopyMatrixToTensor(const Matrix& matrix, bool is_row_major_matrix,
float* tensor_ptr) {
if (is_row_major_matrix) {
auto matrix_map =
Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
} else {
auto matrix_map =
Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
}
return absl::OkStatus();
}
absl::StatusOr<Tensor> ConvertImageFrameToTensorOnCpu(
const ImageFrame& image_frame, const std::pair<float, float>& output_range,
bool flip_vertically, int max_num_channels) {
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels);
const mediapipe::ImageFormat::Format format = image_frame.Format();
if (!(format == mediapipe::ImageFormat::SRGBA ||
format == mediapipe::ImageFormat::SRGB ||
format == mediapipe::ImageFormat::GRAY8 ||
format == mediapipe::ImageFormat::VEC32F1))
RET_CHECK_FAIL() << "Unsupported CPU input format.";
Tensor output_tensor(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels_preserved});
auto cpu_view = output_tensor.GetCpuWriteView();
// Copy image data into tensor.
if (image_frame.ByteDepth() == 1) {
MP_RETURN_IF_ERROR(NormalizeUInt8Image(image_frame, flip_vertically,
output_range, max_num_channels,
cpu_view.buffer<float>()));
} else if (image_frame.ByteDepth() == 4) {
MP_RETURN_IF_ERROR(NormalizeFloatImage(image_frame, flip_vertically,
output_range, max_num_channels,
cpu_view.buffer<float>()));
} else {
return absl::InternalError(
"Only byte-based (8 bit) and float (32 bit) images supported.");
}
return output_tensor;
}
absl::StatusOr<Tensor> ConvertMatrixToTensorOnCpu(const Matrix& matrix,
bool row_major_matrix) {
const int height = matrix.rows();
const int width = matrix.cols();
const int channels = 1;
Tensor output_tensor(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels});
MP_RETURN_IF_ERROR(
CopyMatrixToTensor(matrix, row_major_matrix,
output_tensor.GetCpuWriteView().buffer<float>()));
return output_tensor;
}
} // namespace mediapipe

View File

@ -0,0 +1,61 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_
#include <utility>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
namespace mediapipe {
// Converts an ImageFrame to a vector of Tensors.
// @flip_vertically enables to flip the image during conversion.
// @max_num_channels can be used to reserve extra channels in the output
// tensors.
// Returns output Tensor.
absl::StatusOr<Tensor> ConvertImageFrameToTensorOnCpu(
const ImageFrame& image_frame, const std::pair<float, float>& output_range,
bool flip_vertically, int max_num_channels);
// Converts a Matrix to a vector of Tensors.
// @row_major_matrix defines the ordering in the input matrix.
// @max_num_channels can be used to reserve extra channels in the output
// tensors.
// Returns output Tensor.
absl::StatusOr<Tensor> ConvertMatrixToTensorOnCpu(const Matrix& matrix,
bool row_major_matrix);
// For testing only below.
absl::Status NormalizeUInt8Image(const ImageFrame& image_frame,
bool flip_vertically,
const std::pair<float, float>& output_range,
int max_num_channels, float* tensor_ptr);
absl::Status NormalizeFloatImage(const ImageFrame& image_frame,
bool flip_vertically,
const std::pair<float, float>& output_range,
int max_num_channels, float* tensor_ptr);
absl::Status CopyMatrixToTensor(const Matrix& matrix, bool is_row_major_matrix,
float* tensor_ptr);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_CONVERTER_CPU_H_

View File

@ -0,0 +1,175 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensor_converter_cpu.h"
#include <cstdint>
#include <utility>
#include <vector>
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/util/image_test_utils.h"
namespace mediapipe {
namespace {
Matrix CreateTestMatrix(int num_rows, int num_columns) {
Matrix matrix(num_rows, num_columns);
for (int r = 0; r < num_rows; ++r) {
for (int c = 0; c < num_columns; ++c) {
matrix(r, c) = r * num_columns + c;
}
}
return matrix;
}
TEST(TensorConverterCpuTest, ShouldCopyMatrixInRowMajorFormatToTensor) {
auto test_matrix = CreateTestMatrix(/* num_rows=*/3, /*num_columns=*/4);
std::vector<float> tensor_data(test_matrix.size(), 0.0f);
MP_EXPECT_OK(CopyMatrixToTensor(test_matrix, /*is_row_major_matrix=*/true,
tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
const int row = i / test_matrix.cols();
const int column = i % test_matrix.cols();
EXPECT_FLOAT_EQ(tensor_data[i], (test_matrix)(row, column));
}
}
TEST(TensorConverterCpuTest, ShouldCopyMatrixInColumnMajorFormatToTensor) {
auto test_matrix = CreateTestMatrix(/*num_rows=*/3, /*num_columns=*/4);
std::vector<float> tensor_data(test_matrix.size(), 0.0f);
MP_EXPECT_OK(CopyMatrixToTensor(test_matrix, /*is_row_major_matrix=*/false,
tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
const int row = i % test_matrix.rows();
const int column = i / test_matrix.rows();
EXPECT_FLOAT_EQ(tensor_data[i], (test_matrix)(row, column));
}
}
TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageWithDefaultRange) {
auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4);
std::vector<float> tensor_data(
grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f);
MP_EXPECT_OK(NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/false,
{0.0f, 1.0f}, /*num_tensor_channels=*/1,
tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
EXPECT_FLOAT_EQ(
tensor_data[i],
static_cast<uint8_t>(grey8_image_frame.PixelData()[i]) / 255.0f);
}
}
TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageWithSpecifiedRange) {
auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4);
std::vector<float> tensor_data(
grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f);
const auto range = std::make_pair(2.0f, 3.0f);
MP_EXPECT_OK(
NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/false, range,
/*num_tensor_channels=*/1, tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
EXPECT_FLOAT_EQ(tensor_data[i],
static_cast<uint8_t>(grey8_image_frame.PixelData()[i]) /
255.0f * (range.second - range.first) +
range.first);
}
}
TEST(TensorConverterCpuTest, ShouldNormalizeGrey8ImageFlipped) {
auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4);
std::vector<float> tensor_data(
grey8_image_frame.Width() * grey8_image_frame.Height(), 0.0f);
MP_EXPECT_OK(NormalizeUInt8Image(grey8_image_frame, /*flip_vertically=*/true,
{0.0f, 1.0f}, /*num_tensor_channels=*/1,
tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
const int x = i % grey8_image_frame.Width();
const int y = i / grey8_image_frame.Width();
const int flipped_y = grey8_image_frame.Height() - y - 1;
const int index = flipped_y * grey8_image_frame.Width() + x;
EXPECT_FLOAT_EQ(
tensor_data[index],
static_cast<uint8_t>(grey8_image_frame.PixelData()[i]) / 255.0f);
}
}
TEST(TensorConverterCpuTest, ShouldNormalizeFloatImageWithDefaultRange) {
auto float_image_frame =
CreateTestFloat32ImageFrame(/*width=*/3, /*height=*/4);
std::vector<float> tensor_data(
float_image_frame.Width() * float_image_frame.Height(), 0.0f);
MP_EXPECT_OK(NormalizeFloatImage(float_image_frame, /*flip_vertically=*/false,
{0.0f, 1.0f}, /*num_tensor_channels=*/1,
tensor_data.data()));
for (int i = 0; i < tensor_data.size(); ++i) {
EXPECT_FLOAT_EQ(tensor_data[i], reinterpret_cast<const float*>(
float_image_frame.PixelData())[i] /
255.0f);
}
}
TEST(TensorConverterCpuTest, ConvertImageFrameToTensorOnCpu) {
auto grey8_image_frame = CreateTestGrey8ImageFrame(/*width=*/3, /*height=*/4);
MP_ASSERT_OK_AND_ASSIGN(Tensor output, ConvertImageFrameToTensorOnCpu(
grey8_image_frame, {0.0f, 1.0f},
/*flip_vertically=*/false,
/*max_num_channels=*/1));
const auto cpu_read_view = output.GetCpuReadView();
const float* tensor_ptr = cpu_read_view.buffer<float>();
for (int i = 0; i < grey8_image_frame.Width() * grey8_image_frame.Height();
++i) {
EXPECT_FLOAT_EQ(
tensor_ptr[i],
static_cast<uint8_t>(grey8_image_frame.PixelData()[i]) / 255.0);
}
}
TEST(TensorConverterCpuTest, ConvertMatrixToTensorOnCpu) {
auto test_matrix = CreateTestMatrix(/*num_rows=*/3, /*num_columns=*/4);
MP_ASSERT_OK_AND_ASSIGN(
Tensor output, ConvertMatrixToTensorOnCpu(test_matrix,
/*row_major_matrix=*/false));
const auto cpu_read_view = output.GetCpuReadView();
const float* tensor_ptr = cpu_read_view.buffer<float>();
for (int i = 0; i < test_matrix.size(); ++i) {
EXPECT_FLOAT_EQ(tensor_ptr[i], test_matrix.data()[i]);
}
}
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,84 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensor_to_joints_calculator.h"
#include <utility>
#include "mediapipe/calculators/tensor/tensor_to_joints_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/body_rig.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace api2 {
namespace {
// Number of values in 6D representation of rotation.
constexpr int kRotation6dSize = 6;
} // namespace
class TensorToJointsCalculatorImpl
: public mediapipe::api2::NodeImpl<TensorToJointsCalculator> {
public:
absl::Status Open(CalculatorContext* cc) override {
const auto& options = cc->Options<TensorToJointsCalculatorOptions>();
// Get number of joints.
RET_CHECK_GE(options.num_joints(), 0);
num_joints_ = options.num_joints();
// Get start index.
start_index_ = options.start_index();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
// Skip if Tensor is empty.
if (kInTensor(cc).IsEmpty()) {
return absl::OkStatus();
}
// Get raw floats from the Tensor.
const Tensor& tensor = kInTensor(cc).Get();
RET_CHECK_EQ(tensor.shape().num_elements(),
num_joints_ * kRotation6dSize + start_index_)
<< "Unexpected number of values in Tensor";
const float* raw_floats = tensor.GetCpuReadView().buffer<float>();
// Convert raw floats into Joint rotations.
JointList joints;
for (int joint_idx = 0; joint_idx < num_joints_; ++joint_idx) {
Joint* joint = joints.add_joint();
for (int idx_6d = 0; idx_6d < kRotation6dSize; ++idx_6d) {
joint->add_rotation_6d(
raw_floats[start_index_ + joint_idx * kRotation6dSize + idx_6d]);
}
}
kOutJoints(cc).Send(std::move(joints));
return absl::OkStatus();
}
private:
int num_joints_ = 0;
int start_index_ = 0;
};
MEDIAPIPE_NODE_IMPLEMENTATION(TensorToJointsCalculatorImpl);
} // namespace api2
} // namespace mediapipe

View File

@ -0,0 +1,64 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_TO_JOINTS_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_TO_JOINTS_CALCULATOR_H_
#include <memory>
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/body_rig.pb.h"
#include "mediapipe/framework/formats/tensor.h"
namespace mediapipe {
namespace api2 {
// A calculator to convert Tensors to JointList.
//
// Calculator fills in only rotation of the joints leaving visibility undefined.
//
// Input:
// TENSOR - std::vector<Tensor> with kFloat32 values
// Vector of tensors to be converted to joints. Only the first tensor will
// be used. Number of values is expected to be multiple of six.
//
// Output:
// JOINTS - JointList
// List of joints with rotations extracted from given tensor and undefined
// visibility.
//
// Example:
// node {
// calculator: "TensorToJointsCalculator"
// input_stream: "TENSOR:tensor"
// output_stream: "JOINTS:joints"
// options: {
// [mediapipe.TensorToJointsCalculatorOptions.ext] {
// num_joints: 56
// start_index: 3
// }
// }
// }
class TensorToJointsCalculator : public NodeIntf {
public:
static constexpr Input<mediapipe::Tensor> kInTensor{"TENSOR"};
static constexpr Output<mediapipe::JointList> kOutJoints{"JOINTS"};
MEDIAPIPE_NODE_INTERFACE(TensorToJointsCalculator, kInTensor, kOutJoints);
};
} // namespace api2
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSOR_TO_JOINTS_CALCULATOR_H_

View File

@ -0,0 +1,32 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorToJointsCalculatorOptions {
extend CalculatorOptions {
optional TensorToJointsCalculatorOptions ext = 406440177;
}
// Number of joints from the output of the model. Calculator will expect the
// tensor to contain `6 * num_joints + start_index` values.
optional int32 num_joints = 1;
// Index to start reading 6 value blocks from.
optional int32 start_index = 2 [default = 0];
}

View File

@ -0,0 +1,123 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/body_rig.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/timestamp.h"
namespace mediapipe {
namespace api2 {
namespace {
using Node = ::mediapipe::CalculatorGraphConfig::Node;
struct TensorToJointsTestCase {
std::string test_name;
int num_joints;
int start_index;
std::vector<float> raw_values;
std::vector<std::vector<float>> expected_rotations;
};
using TensorToJointsTest = ::testing::TestWithParam<TensorToJointsTestCase>;
TEST_P(TensorToJointsTest, TensorToJointsTest) {
const TensorToJointsTestCase& tc = GetParam();
// Prepare graph.
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(absl::Substitute(
R"(
calculator: "TensorToJointsCalculator"
input_stream: "TENSOR:tensor"
output_stream: "JOINTS:joints"
options: {
[mediapipe.TensorToJointsCalculatorOptions.ext] {
num_joints: $0
start_index: $1
}
}
)",
tc.num_joints, tc.start_index)));
// Prepare tensor.
Tensor tensor(Tensor::ElementType::kFloat32,
Tensor::Shape{1, 1, static_cast<int>(tc.raw_values.size()), 1});
float* tensor_buffer = tensor.GetCpuWriteView().buffer<float>();
ASSERT_NE(tensor_buffer, nullptr);
for (int i = 0; i < tc.raw_values.size(); ++i) {
tensor_buffer[i] = tc.raw_values[i];
}
// Send tensor to the graph.
runner.MutableInputs()->Tag("TENSOR").packets.push_back(
mediapipe::MakePacket<Tensor>(std::move(tensor)).At(Timestamp(0)));
// Run the graph.
MP_ASSERT_OK(runner.Run());
const auto& output_packets = runner.Outputs().Tag("JOINTS").packets;
EXPECT_EQ(1, output_packets.size());
const auto& joints = output_packets[0].Get<JointList>();
EXPECT_EQ(joints.joint_size(), tc.expected_rotations.size());
for (int i = 0; i < joints.joint_size(); ++i) {
const Joint& joint = joints.joint(i);
std::vector<float> expected_rotation_6d = tc.expected_rotations[i];
EXPECT_EQ(joint.rotation_6d_size(), expected_rotation_6d.size())
<< "Unexpected joint #" << i << " rotation";
for (int j = 0; j < joint.rotation_6d_size(); ++j) {
EXPECT_EQ(joint.rotation_6d(j), expected_rotation_6d[j])
<< "Unexpected joint #" << i << " rotation";
}
EXPECT_FALSE(joint.has_visibility());
}
}
INSTANTIATE_TEST_SUITE_P(
TensorToJointsTests, TensorToJointsTest,
testing::ValuesIn<TensorToJointsTestCase>({
{"Empty", 0, 3, {0, 0, 0}, {}},
{"Single",
1,
3,
{0, 0, 0, 10, 11, 12, 13, 14, 15},
{{10, 11, 12, 13, 14, 15}}},
{"Double",
2,
3,
{0, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21},
{{10, 11, 12, 13, 14, 15}, {16, 17, 18, 19, 20, 21}}},
}),
[](const testing::TestParamInfo<TensorToJointsTest::ParamType>& info) {
return info.param.test_name;
});
} // namespace
} // namespace api2
} // namespace mediapipe

View File

@ -110,8 +110,8 @@ absl::Status TensorsToClassificationCalculator::Open(CalculatorContext* cc) {
sort_by_descending_score_ = options.sort_by_descending_score();
if (options.has_label_map_path()) {
std::string string_path;
ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options.label_map_path()));
MP_ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options.label_map_path()));
std::string label_map_string;
MP_RETURN_IF_ERROR(
mediapipe::GetResourceContents(string_path, &label_map_string));

View File

@ -34,7 +34,7 @@ message TensorsToClassificationCalculatorOptions {
repeated Entry entries = 1;
}
// Score threshold for perserving the class.
// Score threshold for preserving the class.
optional float min_score_threshold = 1;
// Number of highest scoring labels to output. If top_k is not positive then
// all labels are used.

View File

@ -15,7 +15,6 @@
#include <unordered_map>
#include <vector>
#include "absl/log/absl_log.h"
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "mediapipe/calculators/tensor/tensors_to_detections_calculator.pb.h"
@ -147,7 +146,7 @@ BoxFormat GetBoxFormat(const TensorsToDetectionsCalculatorOptions& options) {
// TENSORS - Vector of Tensors of type kFloat32. The vector of tensors can have
// 2 or 3 tensors. First tensor is the predicted raw boxes/keypoints.
// The size of the values must be (num_boxes * num_predicted_values).
// Second tensor is the score tensor. The size of the valuse must be
// Second tensor is the score tensor. The size of the values must be
// (num_boxes * num_classes). It's optional to pass in a third tensor
// for anchors (e.g. for SSD models) depend on the outputs of the
// detection model. The size of anchor tensor must be (num_boxes *
@ -215,7 +214,8 @@ class TensorsToDetectionsCalculator : public Node {
const int* detection_classes,
std::vector<Detection>* output_detections);
Detection ConvertToDetection(float box_ymin, float box_xmin, float box_ymax,
float box_xmax, float score, int class_id,
float box_xmax, absl::Span<const float> scores,
absl::Span<const int> class_ids,
bool flip_vertically);
bool IsClassIndexAllowed(int class_index);
@ -223,6 +223,7 @@ class TensorsToDetectionsCalculator : public Node {
int num_boxes_ = 0;
int num_coords_ = 0;
int max_results_ = -1;
int classes_per_detection_ = 1;
BoxFormat box_output_format_ =
mediapipe::TensorsToDetectionsCalculatorOptions::YXHW;
@ -266,7 +267,8 @@ absl::Status TensorsToDetectionsCalculator::UpdateContract(
CalculatorContract* cc) {
if (CanUseGpu()) {
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(
cc, /*request_gpu_as_optional=*/true));
#elif MEDIAPIPE_METAL_ENABLED
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#endif // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
@ -280,7 +282,6 @@ absl::Status TensorsToDetectionsCalculator::Open(CalculatorContext* cc) {
if (CanUseGpu()) {
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#elif MEDIAPIPE_METAL_ENABLED
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(gpu_helper_);
@ -484,6 +485,16 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
auto num_boxes_view = num_boxes_tensor->GetCpuReadView();
auto num_boxes = num_boxes_view.buffer<float>();
num_boxes_ = num_boxes[0];
// The detection model with Detection_PostProcess op may output duplicate
// boxes with different classes, in the following format:
// num_boxes_tensor = [num_boxes]
// detection_classes_tensor = [box_1_class_1, box_1_class_2, ...]
// detection_scores_tensor = [box_1_score_1, box_1_score_2, ... ]
// detection_boxes_tensor = [box_1, box1, ... ]
// Each box repeats classes_per_detection_ times.
// Note Detection_PostProcess op is only supported in CPU.
RET_CHECK_EQ(max_detections % num_boxes_, 0);
classes_per_detection_ = max_detections / num_boxes_;
auto detection_boxes_view = detection_boxes_tensor->GetCpuReadView();
auto detection_boxes = detection_boxes_view.buffer<float>();
@ -493,8 +504,8 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
auto detection_classes_view = detection_classes_tensor->GetCpuReadView();
auto detection_classes_ptr = detection_classes_view.buffer<float>();
std::vector<int> detection_classes(num_boxes_);
for (int i = 0; i < num_boxes_; ++i) {
std::vector<int> detection_classes(num_boxes_ * classes_per_detection_);
for (int i = 0; i < detection_classes.size(); ++i) {
detection_classes[i] = static_cast<int>(detection_classes_ptr[i]);
}
MP_RETURN_IF_ERROR(ConvertToDetections(detection_boxes, detection_scores,
@ -676,13 +687,15 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU(
absl::Status TensorsToDetectionsCalculator::Close(CalculatorContext* cc) {
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
gpu_helper_.RunInGlContext([this] {
decoded_boxes_buffer_ = nullptr;
scored_boxes_buffer_ = nullptr;
raw_anchors_buffer_ = nullptr;
glDeleteProgram(decode_program_);
glDeleteProgram(score_program_);
});
if (gpu_inited_) {
gpu_helper_.RunInGlContext([this] {
decoded_boxes_buffer_ = nullptr;
scored_boxes_buffer_ = nullptr;
raw_anchors_buffer_ = nullptr;
glDeleteProgram(decode_program_);
glDeleteProgram(score_program_);
});
}
#elif MEDIAPIPE_METAL_ENABLED
decoded_boxes_buffer_ = nullptr;
scored_boxes_buffer_ = nullptr;
@ -861,24 +874,25 @@ absl::Status TensorsToDetectionsCalculator::DecodeBoxes(
absl::Status TensorsToDetectionsCalculator::ConvertToDetections(
const float* detection_boxes, const float* detection_scores,
const int* detection_classes, std::vector<Detection>* output_detections) {
for (int i = 0; i < num_boxes_; ++i) {
for (int i = 0; i < num_boxes_ * classes_per_detection_;
i += classes_per_detection_) {
if (max_results_ > 0 && output_detections->size() == max_results_) {
break;
}
if (options_.has_min_score_thresh() &&
detection_scores[i] < options_.min_score_thresh()) {
continue;
}
if (!IsClassIndexAllowed(detection_classes[i])) {
continue;
}
const int box_offset = i * num_coords_;
Detection detection = ConvertToDetection(
/*box_ymin=*/detection_boxes[box_offset + box_indices_[0]],
/*box_xmin=*/detection_boxes[box_offset + box_indices_[1]],
/*box_ymax=*/detection_boxes[box_offset + box_indices_[2]],
/*box_xmax=*/detection_boxes[box_offset + box_indices_[3]],
detection_scores[i], detection_classes[i], options_.flip_vertically());
absl::MakeConstSpan(detection_scores + i, classes_per_detection_),
absl::MakeConstSpan(detection_classes + i, classes_per_detection_),
options_.flip_vertically());
// if all the scores and classes are filtered out, we skip the empty
// detection.
if (detection.score().empty()) {
continue;
}
const auto& bbox = detection.location_data().relative_bounding_box();
if (bbox.width() < 0 || bbox.height() < 0 || std::isnan(bbox.width()) ||
std::isnan(bbox.height())) {
@ -908,11 +922,21 @@ absl::Status TensorsToDetectionsCalculator::ConvertToDetections(
}
Detection TensorsToDetectionsCalculator::ConvertToDetection(
float box_ymin, float box_xmin, float box_ymax, float box_xmax, float score,
int class_id, bool flip_vertically) {
float box_ymin, float box_xmin, float box_ymax, float box_xmax,
absl::Span<const float> scores, absl::Span<const int> class_ids,
bool flip_vertically) {
Detection detection;
detection.add_score(score);
detection.add_label_id(class_id);
for (int i = 0; i < scores.size(); ++i) {
if (!IsClassIndexAllowed(class_ids[i])) {
continue;
}
if (options_.has_min_score_thresh() &&
scores[i] < options_.min_score_thresh()) {
continue;
}
detection.add_score(scores[i]);
detection.add_label_id(class_ids[i]);
}
LocationData* location_data = detection.mutable_location_data();
location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX);
@ -942,6 +966,7 @@ absl::Status TensorsToDetectionsCalculator::GpuInit(CalculatorContext* cc) {
break;
}
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, output_format_flag]()
-> absl::Status {
// A shader to decode detection boxes.
@ -1420,7 +1445,6 @@ kernel void scoreKernel(
num_classes_, max_wg_size));
}
}
#endif // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
return absl::OkStatus();

View File

@ -75,7 +75,7 @@ message TensorsToDetectionsCalculatorOptions {
// representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_vertically = 18 [default = false];
// Score threshold for perserving decoded detections.
// Score threshold for preserving decoded detections.
optional float min_score_thresh = 19;
// The maximum number of the detection results to return. If < 0, all

View File

@ -124,7 +124,7 @@ absl::Status TensorsToLandmarksCalculator::Open(CalculatorContext* cc) {
kFlipVertically(cc).IsConnected())) {
RET_CHECK(options_.has_input_image_height() &&
options_.has_input_image_width())
<< "Must provide input width/height for using flipping when outputing "
<< "Must provide input width/height for using flipping when outputting "
"landmarks in absolute coordinates.";
}
return absl::OkStatus();

View File

@ -12,32 +12,35 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h"
#include "mediapipe/framework/calculator_context.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#include "mediapipe/util/resource_util.h"
#include "tensorflow/lite/interpreter.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/shader_util.h"
#endif // !MEDIAPIPE_DISABLE_GPU
#if !MEDIAPIPE_DISABLE_OPENCV
#include "mediapipe/framework/formats/image_opencv.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h"
#endif // !MEDIAPIPE_DISABLE_OPENCV
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
@ -62,37 +65,9 @@ namespace {
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) { // NOLINT
return (size + group_size - 1) / group_size;
}
bool CanUseGpu() {
#if !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED
// TODO: Configure GPU usage policy in individual calculators.
constexpr bool kAllowGpuProcessing = true;
return kAllowGpuProcessing;
#else
return false;
#endif // !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED
}
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kOutputSizeTag[] = "OUTPUT_SIZE";
constexpr char kMaskTag[] = "MASK";
absl::StatusOr<std::tuple<int, int, int>> GetHwcFromDims(
const std::vector<int>& dims) {
if (dims.size() == 3) {
return std::make_tuple(dims[0], dims[1], dims[2]);
} else if (dims.size() == 4) {
// BHWC format check B == 1
RET_CHECK_EQ(1, dims[0]) << "Expected batch to be 1 for BHWC heatmap";
return std::make_tuple(dims[1], dims[2], dims[3]);
} else {
RET_CHECK(false) << "Invalid shape for segmentation tensor " << dims.size();
}
}
} // namespace
namespace mediapipe {
@ -156,24 +131,37 @@ class TensorsToSegmentationCalculator : public CalculatorBase {
private:
absl::Status LoadOptions(CalculatorContext* cc);
absl::Status InitGpu(CalculatorContext* cc);
absl::Status ProcessGpu(CalculatorContext* cc);
absl::Status ProcessCpu(CalculatorContext* cc);
absl::Status ProcessGpu(CalculatorContext* cc,
const std::vector<Tensor>& input_tensors,
std::tuple<int, int, int> hwc, int output_width,
int output_height);
void GlRender();
bool DoesGpuTextureStartAtBottom() {
return options_.gpu_origin() != mediapipe::GpuOrigin_Mode_TOP_LEFT;
}
absl::Status InitConverterIfNecessary() {
#if !MEDIAPIPE_DISABLE_OPENCV
template <class T>
absl::Status ApplyActivation(cv::Mat& tensor_mat, cv::Mat* small_mask_mat);
if (!cpu_converter_) {
MP_ASSIGN_OR_RETURN(cpu_converter_, CreateOpenCvConverter(options_));
}
#else
RET_CHECK_FAIL() << "OpenCV processing disabled.";
#endif // !MEDIAPIPE_DISABLE_OPENCV
::mediapipe::TensorsToSegmentationCalculatorOptions options_;
return absl::OkStatus();
}
mediapipe::TensorsToSegmentationCalculatorOptions options_;
std::unique_ptr<TensorsToSegmentationConverter> cpu_converter_;
#if !MEDIAPIPE_DISABLE_GPU
mediapipe::GlCalculatorHelper gpu_helper_;
GLuint upsample_program_;
bool gpu_initialized_ = false;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
int cached_width_ = 0;
int cached_height_ = 0;
std::unique_ptr<tflite::gpu::gl::GlTexture> small_mask_texture_;
std::unique_ptr<GlProgram> mask_program_31_;
#else
GLuint mask_program_20_;
@ -203,7 +191,8 @@ absl::Status TensorsToSegmentationCalculator::GetContract(
if (CanUseGpu()) {
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(
cc, /*request_gpu_as_optional=*/true));
#if MEDIAPIPE_METAL_ENABLED
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#endif // MEDIAPIPE_METAL_ENABLED
@ -215,12 +204,9 @@ absl::Status TensorsToSegmentationCalculator::GetContract(
absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
bool use_gpu = false;
if (CanUseGpu()) {
#if !MEDIAPIPE_DISABLE_GPU
use_gpu = true;
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#if MEDIAPIPE_METAL_ENABLED
metal_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(metal_helper_);
@ -230,14 +216,6 @@ absl::Status TensorsToSegmentationCalculator::Open(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(LoadOptions(cc));
if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(InitGpu(cc));
#else
RET_CHECK_FAIL() << "GPU processing disabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
}
return absl::OkStatus();
}
@ -264,9 +242,10 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
{
RET_CHECK(!input_tensors.empty());
RET_CHECK(input_tensors[0].element_type() == Tensor::ElementType::kFloat32);
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
MP_ASSIGN_OR_RETURN(auto hwc,
GetHwcFromDims(input_tensors[0].shape().dims));
int tensor_channels = std::get<2>(hwc);
typedef mediapipe::TensorsToSegmentationCalculatorOptions Options;
using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions;
switch (options_.activation()) {
case Options::NONE:
RET_CHECK_EQ(tensor_channels, 1);
@ -280,18 +259,47 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
}
}
// Get dimensions.
MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
int output_width = tensor_width, output_height = tensor_height;
if (cc->Inputs().HasTag(kOutputSizeTag)) {
const auto& size =
cc->Inputs().Tag(kOutputSizeTag).Get<std::pair<int, int>>();
output_width = size.first;
output_height = size.second;
}
if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
MP_RETURN_IF_ERROR(ProcessGpu(cc));
return absl::OkStatus();
}));
if (!gpu_initialized_) {
MP_RETURN_IF_ERROR(InitGpu(cc));
gpu_initialized_ = true;
}
#else
RET_CHECK_FAIL() << "GPU processing disabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this, cc, &input_tensors, output_width,
output_height, hwc]() -> absl::Status {
MP_RETURN_IF_ERROR(
ProcessGpu(cc, input_tensors, hwc, output_width, output_height));
return absl::OkStatus();
}));
#else
RET_CHECK_FAIL() << "GPU processing disabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
} else {
#if !MEDIAPIPE_DISABLE_OPENCV
MP_RETURN_IF_ERROR(ProcessCpu(cc));
// Lazily initialize converter.
MP_RETURN_IF_ERROR(InitConverterIfNecessary());
MP_ASSIGN_OR_RETURN(
std::unique_ptr<Image> output_mask,
cpu_converter_->Convert(input_tensors, output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(),
cc->InputTimestamp());
#else
RET_CHECK_FAIL() << "OpenCV processing disabled.";
#endif // !MEDIAPIPE_DISABLE_OPENCV
@ -302,11 +310,16 @@ absl::Status TensorsToSegmentationCalculator::Process(CalculatorContext* cc) {
absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
if (!gpu_initialized_) {
return absl::OkStatus();
}
gpu_helper_.RunInGlContext([this] {
if (upsample_program_) glDeleteProgram(upsample_program_);
upsample_program_ = 0;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
mask_program_31_.reset();
small_mask_texture_.reset();
#else
if (mask_program_20_) glDeleteProgram(mask_program_20_);
mask_program_20_ = 0;
@ -320,149 +333,35 @@ absl::Status TensorsToSegmentationCalculator::Close(CalculatorContext* cc) {
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::ProcessCpu(
CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_OPENCV
// Get input streams, and dimensions.
const auto& input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
int output_width = tensor_width, output_height = tensor_height;
if (cc->Inputs().HasTag(kOutputSizeTag)) {
const auto& size =
cc->Inputs().Tag(kOutputSizeTag).Get<std::pair<int, int>>();
output_width = size.first;
output_height = size.second;
}
// Create initial working mask.
cv::Mat small_mask_mat(cv::Size(tensor_width, tensor_height), CV_32FC1);
// Wrap input tensor.
auto raw_input_tensor = &input_tensors[0];
auto raw_input_view = raw_input_tensor->GetCpuReadView();
const float* raw_input_data = raw_input_view.buffer<float>();
cv::Mat tensor_mat(cv::Size(tensor_width, tensor_height),
CV_MAKETYPE(CV_32F, tensor_channels),
const_cast<float*>(raw_input_data));
// Process mask tensor and apply activation function.
if (tensor_channels == 2) {
MP_RETURN_IF_ERROR(ApplyActivation<cv::Vec2f>(tensor_mat, &small_mask_mat));
} else if (tensor_channels == 1) {
RET_CHECK(mediapipe::TensorsToSegmentationCalculatorOptions::SOFTMAX !=
options_.activation()); // Requires 2 channels.
if (mediapipe::TensorsToSegmentationCalculatorOptions::NONE ==
options_.activation()) // Pass-through optimization.
tensor_mat.copyTo(small_mask_mat);
else
MP_RETURN_IF_ERROR(ApplyActivation<float>(tensor_mat, &small_mask_mat));
} else {
RET_CHECK_FAIL() << "Unsupported number of tensor channels "
<< tensor_channels;
}
// Send out image as CPU packet.
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_width, output_height);
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output.
cv::resize(small_mask_mat, *output_mat,
cv::Size(output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
#endif // !MEDIAPIPE_DISABLE_OPENCV
return absl::OkStatus();
}
#if !MEDIAPIPE_DISABLE_OPENCV
template <class T>
absl::Status TensorsToSegmentationCalculator::ApplyActivation(
cv::Mat& tensor_mat, cv::Mat* small_mask_mat) {
// Configure activation function.
const int output_layer_index = options_.output_layer_index();
typedef mediapipe::TensorsToSegmentationCalculatorOptions Options;
const auto activation_fn = [&](const cv::Vec2f& mask_value) {
float new_mask_value = 0;
// TODO consider moving switch out of the loop,
// and also avoid float/Vec2f casting.
switch (options_.activation()) {
case Options::NONE: {
new_mask_value = mask_value[0];
break;
}
case Options::SIGMOID: {
const float pixel0 = mask_value[0];
new_mask_value = 1.0 / (std::exp(-pixel0) + 1.0);
break;
}
case Options::SOFTMAX: {
const float pixel0 = mask_value[0];
const float pixel1 = mask_value[1];
const float max_pixel = std::max(pixel0, pixel1);
const float min_pixel = std::min(pixel0, pixel1);
const float softmax_denom =
/*exp(max_pixel - max_pixel)=*/1.0f +
std::exp(min_pixel - max_pixel);
new_mask_value = std::exp(mask_value[output_layer_index] - max_pixel) /
softmax_denom;
break;
}
}
return new_mask_value;
};
// Process mask tensor.
for (int i = 0; i < tensor_mat.rows; ++i) {
for (int j = 0; j < tensor_mat.cols; ++j) {
const T& input_pix = tensor_mat.at<T>(i, j);
const float mask_value = activation_fn(input_pix);
small_mask_mat->at<float>(i, j) = mask_value;
}
}
return absl::OkStatus();
}
#endif // !MEDIAPIPE_DISABLE_OPENCV
// Steps:
// 1. receive tensor
// 2. process segmentation tensor into small mask
// 3. upsample small mask into output mask to be same size as input image
absl::Status TensorsToSegmentationCalculator::ProcessGpu(
CalculatorContext* cc) {
CalculatorContext* cc, const std::vector<Tensor>& input_tensors,
std::tuple<int, int, int> hwc, int output_width, int output_height) {
#if !MEDIAPIPE_DISABLE_GPU
// Get input streams, and dimensions.
const auto& input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
int output_width = tensor_width, output_height = tensor_height;
if (cc->Inputs().HasTag(kOutputSizeTag)) {
const auto& size =
cc->Inputs().Tag(kOutputSizeTag).Get<std::pair<int, int>>();
output_width = size.first;
output_height = size.second;
}
// Create initial working mask texture.
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
tflite::gpu::gl::GlTexture small_mask_texture;
#else
#if !(MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31)
mediapipe::GlTexture small_mask_texture;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Run shader, process mask tensor.
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
{
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
tflite::gpu::DataType::UINT8, // GL_RGBA8
{tensor_width, tensor_height}, &small_mask_texture));
// Only recreate if the size has changed. See b/297809673 for more details.
if (tensor_width != cached_width_ || tensor_height != cached_height_) {
MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture(
tflite::gpu::DataType::UINT8, // GL_RGBA8
{tensor_width, tensor_height}, small_mask_texture_.get()));
cached_width_ = tensor_width;
cached_height_ = tensor_height;
}
const int output_index = 0;
glBindImageTexture(output_index, small_mask_texture.id(), 0, GL_FALSE, 0,
glBindImageTexture(output_index, small_mask_texture_->id(), 0, GL_FALSE, 0,
GL_WRITE_ONLY, GL_RGBA8);
auto read_view = input_tensors[0].GetOpenGlBufferReadView();
@ -547,7 +446,7 @@ absl::Status TensorsToSegmentationCalculator::ProcessGpu(
gpu_helper_.BindFramebuffer(output_texture);
glActiveTexture(GL_TEXTURE1);
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
glBindTexture(GL_TEXTURE_2D, small_mask_texture.id());
glBindTexture(GL_TEXTURE_2D, small_mask_texture_->id());
#else
glBindTexture(GL_TEXTURE_2D, small_mask_texture.name());
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
@ -620,13 +519,14 @@ void TensorsToSegmentationCalculator::GlRender() {
absl::Status TensorsToSegmentationCalculator::LoadOptions(
CalculatorContext* cc) {
// Get calculator options specified in the graph.
options_ = cc->Options<::mediapipe::TensorsToSegmentationCalculatorOptions>();
options_ = cc->Options<mediapipe::TensorsToSegmentationCalculatorOptions>();
return absl::OkStatus();
}
absl::Status TensorsToSegmentationCalculator::InitGpu(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> absl::Status {
// A shader to process a segmentation tensor into an output mask.
// Currently uses 4 channels for output, and sets R+A channels as mask value.
@ -813,7 +713,7 @@ void main() {
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Shader defines.
typedef mediapipe::TensorsToSegmentationCalculatorOptions Options;
using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions;
const std::string output_layer_index =
"\n#define OUTPUT_LAYER_INDEX int(" +
std::to_string(options_.output_layer_index()) + ")";
@ -854,6 +754,7 @@ void main() {
mask_program_31_ = absl::make_unique<GlProgram>();
MP_RETURN_IF_ERROR(GlProgram::CreateWithShader(shader_without_previous,
mask_program_31_.get()));
small_mask_texture_ = absl::make_unique<tflite::gpu::gl::GlTexture>();
#elif MEDIAPIPE_METAL_ENABLED
id<MTLDevice> device = metal_helper_.mtlDevice;
NSString* library_source =
@ -890,6 +791,8 @@ void main() {
return absl::OkStatus();
}));
gpu_initialized_ = true;
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();

View File

@ -0,0 +1,208 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_opencv.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/timestamp.h"
namespace mediapipe {
namespace {
using ::testing::SizeIs;
using ::testing::TestWithParam;
using Options = mediapipe::TensorsToSegmentationCalculatorOptions;
namespace test_utils = ::mediapipe::tensors_to_segmentation_utils;
using TensorsToSegmentationCalculatorTest =
TestWithParam<test_utils::FormattingTestCase>;
TEST_P(TensorsToSegmentationCalculatorTest, ParameterizedTests) {
const auto& [test_name, inputs, expected_outputs, activation, rows, cols,
rows_new, cols_new, channels, max_abs_diff] = GetParam();
auto graph_config =
test_utils::CreateGraphConfigForTest(/*test_gpu=*/false, activation);
std::vector<Packet> output_packets;
tool::AddVectorSink("image_as_mask", &graph_config, &output_packets);
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto tensors = std::make_unique<std::vector<Tensor>>();
tensors->emplace_back(Tensor::ElementType::kFloat32,
Tensor::Shape{1, rows, cols, channels});
// We scope the tensor's GetCpuWriteView() call so that its lock is released
// before we pass it into the graph.
{
auto view = tensors->back().GetCpuWriteView();
float* tensor_buffer = view.buffer<float>();
for (int i = 0; i < inputs.size(); ++i) {
tensor_buffer[i] = inputs[i];
}
MP_ASSERT_OK(graph.AddPacketToInputStream(
"tensors", mediapipe::Adopt(tensors.release()).At(Timestamp(0))));
}
// The output size is defined as pair(new_width, new_height).
MP_ASSERT_OK(graph.AddPacketToInputStream(
"size", mediapipe::Adopt(new std::pair<int, int>(cols_new, rows_new))
.At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_packets, SizeIs(1));
const Image& image_as_mask = output_packets[0].Get<Image>();
EXPECT_FALSE(image_as_mask.UsesGpu());
std::shared_ptr<cv::Mat> result_mat = formats::MatView(&image_as_mask);
EXPECT_EQ(result_mat->rows, rows_new);
EXPECT_EQ(result_mat->cols, cols_new);
EXPECT_EQ(result_mat->channels(), 1);
// Compare the real result with the expected result.
cv::Mat expected_result =
cv::Mat(rows_new, cols_new, CV_32FC1,
const_cast<float*>(expected_outputs.data()));
cv::Mat diff;
cv::absdiff(*result_mat, expected_result, diff);
double max_val;
cv::minMaxLoc(diff, nullptr, &max_val);
// The max allowable diff between output and expected output varies between
// tests.
EXPECT_LE(max_val, max_abs_diff);
MP_ASSERT_OK(graph.CloseInputStream("tensors"));
MP_ASSERT_OK(graph.CloseInputStream("size"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
INSTANTIATE_TEST_SUITE_P(
TensorsToSegmentationCalculatorTests, TensorsToSegmentationCalculatorTest,
testing::ValuesIn<test_utils::FormattingTestCase>({
{.test_name = "NoActivationAndNoOutputResize",
.inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
12.0, 13.0, 14.0, 15.0, 16.0},
.expected_outputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
11.0, 12.0, 13.0, 14.0, 15.0, 16.0},
.activation = Options::NONE,
.rows = 4,
.cols = 4,
.rows_new = 4,
.cols_new = 4,
.channels = 1,
.max_abs_diff = 1e-7},
{.test_name = "OutputResizeOnly",
.inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
12.0, 13.0, 14.0, 15.0, 16.0},
.expected_outputs = {1, 1.5, 2.166667, 2.833333, 3.5, 4,
3.8, 4.3, 4.966667, 5.633333, 6.3, 6.8,
7, 7.5, 8.166667, 8.833333, 9.5, 10,
10.2, 10.7, 11.366667, 12.033333, 12.7, 13.2,
13, 13.5, 14.166667, 14.833333, 15.5, 16},
.activation = Options::NONE,
.rows = 4,
.cols = 4,
.rows_new = 5,
.cols_new = 6,
.channels = 1,
.max_abs_diff = 1e-6},
{.test_name = "SigmoidActivationWithNoOutputResize",
.inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
12.0, 13.0, 14.0, 15.0, 16.0},
.expected_outputs = {0.731059, 0.880797, 0.952574, 0.982014, 0.993307,
0.997527, 0.999089, 0.999665, 0.999877, 0.999955,
0.999983, 0.999994, 0.999998, 0.999999, 1.0, 1.0},
.activation = Options::SIGMOID,
.rows = 4,
.cols = 4,
.rows_new = 4,
.cols_new = 4,
.channels = 1,
.max_abs_diff = 1e-6},
{.test_name = "SigmoidActivationWithOutputResize",
.inputs = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
12.0, 13.0, 14.0, 15.0, 16.0},
.expected_outputs = {0.731059, 0.805928, 0.89276, 0.940611, 0.967294,
0.982014, 0.914633, 0.93857, 0.966279, 0.981363,
0.989752, 0.994369, 0.996592, 0.997666, 0.998873,
0.999404, 0.999683, 0.999829, 0.999913, 0.99994,
0.999971, 0.999985, 0.999992, 0.999996, 0.999998,
0.999998, 0.999999, 1.0, 1.0, 1.0},
.activation = Options::SIGMOID,
.rows = 4,
.cols = 4,
.rows_new = 5,
.cols_new = 6,
.channels = 1,
.max_abs_diff = 1e-6},
{.test_name = "SoftmaxActivationWithNoOutputResize",
.inputs = {1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 6.0, 1.5,
7.0, 10.0, 11.0, 4.0, 12.0, 15.0, 16.0, 18.5,
19.0, 20.0, 22.0, 23.0, 24.5, 23.4, 25.6, 28.3,
29.2, 30.0, 24.6, 29.2, 30.0, 24.9, 31.2, 30.3},
.expected_outputs = {0.731059, 0.119203, 0.880797, 0.0109869, 0.952574,
0.000911051, 0.952574, 0.924142, 0.731059,
0.731059, 0.24974, 0.937027, 0.689974, 0.990048,
0.0060598, 0.28905},
.activation = Options::SOFTMAX,
.rows = 4,
.cols = 4,
.rows_new = 4,
.cols_new = 4,
.channels = 2,
.max_abs_diff = 1e-6},
{.test_name = "SoftmaxActivationWithOutputResize",
.inputs = {1.0, 2.0, 4.0, 2.0, 3.0, 5.0, 6.0, 1.5,
7.0, 10.0, 11.0, 4.0, 12.0, 15.0, 16.0, 18.5,
19.0, 20.0, 22.0, 23.0, 24.5, 23.4, 25.6, 28.3,
29.2, 30.0, 24.6, 29.2, 30.0, 24.9, 31.2, 30.3},
.expected_outputs = {0.731059, 0.425131, 0.246135, 0.753865, 0.445892,
0.0109869, 0.886119, 0.461259, 0.185506, 0.781934,
0.790618, 0.650195, 0.841816, 0.603901, 0.40518,
0.561962, 0.765871, 0.930584, 0.718733, 0.763744,
0.703402, 0.281989, 0.459635, 0.742634, 0.689974,
0.840011, 0.82605, 0.170058, 0.147555, 0.28905},
.activation = Options::SOFTMAX,
.rows = 4,
.cols = 4,
.rows_new = 5,
.cols_new = 6,
.channels = 2,
.max_abs_diff = 1e-6},
}),
[](const testing::TestParamInfo<
TensorsToSegmentationCalculatorTest::ParamType>& info) {
return info.param.test_name;
});
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,111 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h"
#include <string>
#include <vector>
#include "absl/log/absl_log.h"
#include "absl/strings/substitute.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/port/parse_text_proto.h"
namespace mediapipe {
namespace tensors_to_segmentation_utils {
std::string ActivationTypeToString(
const TensorsToSegmentationCalculatorOptions::Activation& activation) {
switch (activation) {
case TensorsToSegmentationCalculatorOptions::NONE:
return "NONE";
case TensorsToSegmentationCalculatorOptions::SIGMOID:
return "SIGMOID";
case TensorsToSegmentationCalculatorOptions::SOFTMAX:
return "SOFTMAX";
}
ABSL_LOG(FATAL) << "Unknown activation type: " << activation;
return "UNKNOWN";
}
std::vector<unsigned char> ArrayFloatToUnsignedChar(
const std::vector<float>& array) {
std::vector<unsigned char> result;
result.reserve(array.size());
for (int i = 0; i < array.size(); ++i) {
result.push_back(static_cast<unsigned char>(array[i]));
}
return result;
}
std::vector<float> MakeRedAlphaMatrix(const std::vector<float>& values) {
std::vector<float> result;
result.reserve(values.size() * 4);
for (const float& value : values) {
result.push_back(value);
result.push_back(0);
result.push_back(0);
result.push_back(value);
}
return result;
}
// For GPU tests, the input tensor needs to be moved to GPU, using
// TensorViewRequestor. After calculation, the output needs to be moved back
// to CPU, using ToImageCalculator. The output is an ImageFrame.
mediapipe::CalculatorGraphConfig CreateGraphConfigForTest(
bool test_gpu,
const TensorsToSegmentationCalculatorOptions::Activation& activation) {
std::string pre_process = R"pb(
node {
calculator: "mediapipe.aimatter.TensorViewRequestor"
input_stream: "TENSORS:tensors"
output_stream: "TENSORS:tensors_gpu"
options {
[mediapipe.aimatter.TensorViewRequestorOptions.ext] { gpu {} }
}
}
)pb";
std::string post_process = R"pb(
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:image_as_mask_gpu"
output_stream: "IMAGE_CPU:image_as_mask"
}
)pb";
return mediapipe::ParseTextProtoOrDie<mediapipe::CalculatorGraphConfig>(
absl::Substitute(
R"pb(
input_stream: "tensors"
input_stream: "size" $0
node {
calculator: "TensorsToSegmentationCalculator"
input_stream: "TENSORS:tensors$1"
input_stream: "OUTPUT_SIZE:size"
output_stream: "MASK:image_as_mask$2"
options: {
[mediapipe.TensorsToSegmentationCalculatorOptions.ext] {
activation: $3
gpu_origin: TOP_LEFT
}
}
} $4
)pb",
test_gpu ? pre_process : "", test_gpu ? "_gpu" : "",
test_gpu ? "_gpu" : "", ActivationTypeToString(activation),
test_gpu ? post_process : ""));
}
} // namespace tensors_to_segmentation_utils
} // namespace mediapipe

View File

@ -0,0 +1,57 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/framework/calculator.pb.h"
namespace mediapipe {
namespace tensors_to_segmentation_utils {
std::string ActivationTypeToString(
const mediapipe::TensorsToSegmentationCalculatorOptions::Activation&
activation);
std::vector<unsigned char> ArrayFloatToUnsignedChar(
const std::vector<float>& array);
std::vector<float> MakeRedAlphaMatrix(const std::vector<float>& values);
mediapipe::CalculatorGraphConfig CreateGraphConfigForTest(
bool test_gpu,
const mediapipe::TensorsToSegmentationCalculatorOptions::Activation&
activation);
struct FormattingTestCase {
std::string test_name;
std::vector<float> inputs;
std::vector<float> expected_outputs;
mediapipe::TensorsToSegmentationCalculatorOptions::Activation activation;
int rows = 1;
int cols = 1;
int rows_new = 1;
int cols_new = 1;
int channels = 1;
double max_abs_diff = 1e-7;
};
} // namespace tensors_to_segmentation_utils
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CALCULATOR_TEST_UTILS_H_

View File

@ -0,0 +1,50 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator_test_utils.h"
#include <vector>
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/framework/port/gtest.h"
namespace mediapipe::tensors_to_segmentation_utils {
namespace {
using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions;
TEST(TensorsToSegmentationCalculatorTestUtilsTest,
ActivationTypeToStringWorksCorrectly) {
EXPECT_EQ(ActivationTypeToString(Options::NONE), "NONE");
EXPECT_EQ(ActivationTypeToString(Options::SIGMOID), "SIGMOID");
EXPECT_EQ(ActivationTypeToString(Options::SOFTMAX), "SOFTMAX");
}
TEST(TensorsToSegmentationCalculatorTestUtilsTest,
ArrayFloatToUnsignedCharWorksCorrectly) {
std::vector<float> input = {1.0, 2.0, 3.0};
std::vector<unsigned char> expected = {1, 2, 3};
EXPECT_EQ(ArrayFloatToUnsignedChar(input), expected);
}
TEST(TensorsToSegmentationCalculatorTestUtilsTest,
MakeRedAlphaMatrixWorksCorrectly) {
std::vector<float> input = {1.0, 2.0, 3.0};
std::vector<float> expected = {1.0, 0.0, 0.0, 1.0, 2.0, 0.0,
0.0, 2.0, 3.0, 0.0, 0.0, 3.0};
EXPECT_EQ(MakeRedAlphaMatrix(input), expected);
}
} // namespace
} // namespace mediapipe::tensors_to_segmentation_utils

View File

@ -0,0 +1,43 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_
#include <memory>
#include <vector>
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/tensor.h"
namespace mediapipe {
class TensorsToSegmentationConverter {
public:
virtual ~TensorsToSegmentationConverter() = default;
// Converts tensors to image mask.
// Returns a unique pointer containing the converted image.
// @input_tensors contains the tensors needed to be processed.
// @output_width/height describes output dimensions to reshape the output mask
// into.
virtual absl::StatusOr<std::unique_ptr<Image>> Convert(
const std::vector<Tensor>& input_tensors, int output_width,
int output_height) = 0;
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_H_

View File

@ -0,0 +1,157 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter_opencv.h"
#include <algorithm>
#include <cmath>
#include <memory>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_opencv.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
namespace mediapipe {
namespace {
class OpenCvProcessor : public TensorsToSegmentationConverter {
public:
absl::Status Init(const TensorsToSegmentationCalculatorOptions& options) {
options_ = options;
return absl::OkStatus();
}
absl::StatusOr<std::unique_ptr<Image>> Convert(
const std::vector<Tensor>& input_tensors, int output_width,
int output_height) override;
private:
template <class T>
absl::Status ApplyActivation(cv::Mat& tensor_mat, cv::Mat* small_mask_mat);
TensorsToSegmentationCalculatorOptions options_;
};
absl::StatusOr<std::unique_ptr<Image>> OpenCvProcessor::Convert(
const std::vector<Tensor>& input_tensors, int output_width,
int output_height) {
MP_ASSIGN_OR_RETURN(auto hwc, GetHwcFromDims(input_tensors[0].shape().dims));
auto [tensor_height, tensor_width, tensor_channels] = hwc;
// Create initial working mask.
cv::Mat small_mask_mat(cv::Size(tensor_width, tensor_height), CV_32FC1);
// Wrap input tensor.
auto raw_input_tensor = &input_tensors[0];
auto raw_input_view = raw_input_tensor->GetCpuReadView();
const float* raw_input_data = raw_input_view.buffer<float>();
cv::Mat tensor_mat(cv::Size(tensor_width, tensor_height),
CV_MAKETYPE(CV_32F, tensor_channels),
const_cast<float*>(raw_input_data));
// Process mask tensor and apply activation function.
if (tensor_channels == 2) {
MP_RETURN_IF_ERROR(ApplyActivation<cv::Vec2f>(tensor_mat, &small_mask_mat));
} else if (tensor_channels == 1) {
RET_CHECK(mediapipe::TensorsToSegmentationCalculatorOptions::SOFTMAX !=
options_.activation()); // Requires 2 channels.
if (mediapipe::TensorsToSegmentationCalculatorOptions::NONE ==
options_.activation()) // Pass-through optimization.
tensor_mat.copyTo(small_mask_mat);
else
MP_RETURN_IF_ERROR(ApplyActivation<float>(tensor_mat, &small_mask_mat));
} else {
RET_CHECK_FAIL() << "Unsupported number of tensor channels "
<< tensor_channels;
}
// Send out image as CPU packet.
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_width, output_height);
auto output_mask = std::make_unique<Image>(mask_frame);
auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output.
cv::resize(small_mask_mat, *output_mat,
cv::Size(output_width, output_height));
return output_mask;
}
template <class T>
absl::Status OpenCvProcessor::ApplyActivation(cv::Mat& tensor_mat,
cv::Mat* small_mask_mat) {
// Configure activation function.
const int output_layer_index = options_.output_layer_index();
using Options = ::mediapipe::TensorsToSegmentationCalculatorOptions;
const auto activation_fn = [&](const cv::Vec2f& mask_value) {
float new_mask_value = 0;
// TODO consider moving switch out of the loop,
// and also avoid float/Vec2f casting.
switch (options_.activation()) {
case Options::NONE: {
new_mask_value = mask_value[0];
break;
}
case Options::SIGMOID: {
const float pixel0 = mask_value[0];
new_mask_value = 1.0 / (std::exp(-pixel0) + 1.0);
break;
}
case Options::SOFTMAX: {
const float pixel0 = mask_value[0];
const float pixel1 = mask_value[1];
const float max_pixel = std::max(pixel0, pixel1);
const float min_pixel = std::min(pixel0, pixel1);
const float softmax_denom =
/*exp(max_pixel - max_pixel)=*/1.0f +
std::exp(min_pixel - max_pixel);
new_mask_value = std::exp(mask_value[output_layer_index] - max_pixel) /
softmax_denom;
break;
}
}
return new_mask_value;
};
// Process mask tensor.
for (int i = 0; i < tensor_mat.rows; ++i) {
for (int j = 0; j < tensor_mat.cols; ++j) {
const T& input_pix = tensor_mat.at<T>(i, j);
const float mask_value = activation_fn(input_pix);
small_mask_mat->at<float>(i, j) = mask_value;
}
}
return absl::OkStatus();
}
} // namespace
absl::StatusOr<std::unique_ptr<TensorsToSegmentationConverter>>
CreateOpenCvConverter(const TensorsToSegmentationCalculatorOptions& options) {
auto converter = std::make_unique<OpenCvProcessor>();
MP_RETURN_IF_ERROR(converter->Init(options));
return converter;
}
} // namespace mediapipe

View File

@ -0,0 +1,31 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_
#include <memory>
#include "absl/status/statusor.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
#include "mediapipe/calculators/tensor/tensors_to_segmentation_converter.h"
namespace mediapipe {
// Creates OpenCV tensors-to-segmentation converter.
absl::StatusOr<std::unique_ptr<TensorsToSegmentationConverter>>
CreateOpenCvConverter(
const mediapipe::TensorsToSegmentationCalculatorOptions& options);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_CONVERTER_OPENCV_H_

View File

@ -0,0 +1,52 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h"
#include <tuple>
#include <vector>
#include "absl/status/statusor.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
int NumGroups(int size, int group_size) {
return (size + group_size - 1) / group_size;
}
bool CanUseGpu() {
#if !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED
// TODO: Configure GPU usage policy in individual calculators.
constexpr bool kAllowGpuProcessing = true;
return kAllowGpuProcessing;
#else
return false;
#endif // !MEDIAPIPE_DISABLE_GPU || MEDIAPIPE_METAL_ENABLED
}
absl::StatusOr<std::tuple<int, int, int>> GetHwcFromDims(
const std::vector<int>& dims) {
if (dims.size() == 3) {
return std::make_tuple(dims[0], dims[1], dims[2]);
} else if (dims.size() == 4) {
// BHWC format check B == 1
RET_CHECK_EQ(dims[0], 1) << "Expected batch to be 1 for BHWC heatmap";
return std::make_tuple(dims[1], dims[2], dims[3]);
} else {
RET_CHECK(false) << "Invalid shape for segmentation tensor " << dims.size();
}
}
} // namespace mediapipe

View File

@ -0,0 +1,34 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_
#include <tuple>
#include <vector>
#include "absl/status/statusor.h"
namespace mediapipe {
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size); // NOLINT
bool CanUseGpu();
absl::StatusOr<std::tuple<int, int, int>> GetHwcFromDims(
const std::vector<int>& dims);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_TENSORS_TO_SEGMENTATION_UTILS_H_

View File

@ -0,0 +1,63 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_segmentation_utils.h"
#include <tuple>
#include <vector>
#include "absl/status/statusor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
using ::testing::HasSubstr;
TEST(TensorsToSegmentationUtilsTest, NumGroupsWorksProperly) {
EXPECT_EQ(NumGroups(13, 4), 4);
EXPECT_EQ(NumGroups(4, 13), 1);
}
TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsWorksProperly) {
std::vector<int> dims_3 = {2, 3, 4};
absl::StatusOr<std::tuple<int, int, int>> result_1 = GetHwcFromDims(dims_3);
MP_ASSERT_OK(result_1);
EXPECT_EQ(result_1.value(), (std::make_tuple(2, 3, 4)));
std::vector<int> dims_4 = {1, 3, 4, 5};
absl::StatusOr<std::tuple<int, int, int>> result_2 = GetHwcFromDims(dims_4);
MP_ASSERT_OK(result_2);
EXPECT_EQ(result_2.value(), (std::make_tuple(3, 4, 5)));
}
TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsBatchCheckFail) {
std::vector<int> dims_4 = {2, 3, 4, 5};
absl::StatusOr<std::tuple<int, int, int>> result = GetHwcFromDims(dims_4);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
HasSubstr("Expected batch to be 1 for BHWC heatmap"));
}
TEST(TensorsToSegmentationUtilsTest, GetHwcFromDimsInvalidShape) {
std::vector<int> dims_5 = {1, 2, 3, 4, 5};
absl::StatusOr<std::tuple<int, int, int>> result = GetHwcFromDims(dims_5);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
HasSubstr("Invalid shape for segmentation tensor"));
}
} // namespace
} // namespace mediapipe

View File

@ -61,9 +61,10 @@ RunUniversalSentenceEncoderPreprocessorCalculator(absl::string_view text) {
std::string model_buffer =
tasks::core::LoadBinaryContent(kTestModelPath.data());
ASSIGN_OR_RETURN(std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(
model_buffer.data(), model_buffer.size()));
MP_ASSIGN_OR_RETURN(
std::unique_ptr<ModelMetadataExtractor> metadata_extractor,
ModelMetadataExtractor::CreateFromModelBuffer(model_buffer.data(),
model_buffer.size()));
// Run the graph.
CalculatorGraph graph;
MP_RETURN_IF_ERROR(graph.Initialize(

View File

@ -151,7 +151,7 @@ class ObjectDetectionTensorsToDetectionsCalculator : public CalculatorBase {
tf::Tensor input_num_detections_tensor =
tf::Tensor(tf::DT_FLOAT, tf::TensorShape({0}));
if (cc->Inputs().HasTag(kClasses)) {
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
input_num_detections_tensor,
MaybeSqueezeDims(kNumDetections,
cc->Inputs().Tag(kNumDetections).Get<tf::Tensor>()));
@ -160,12 +160,12 @@ class ObjectDetectionTensorsToDetectionsCalculator : public CalculatorBase {
RET_CHECK_EQ(input_num_detections_tensor.dtype(), tf::DT_FLOAT);
}
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto input_boxes_tensor,
MaybeSqueezeDims(kBoxes, cc->Inputs().Tag(kBoxes).Get<tf::Tensor>()));
RET_CHECK_EQ(input_boxes_tensor.dtype(), tf::DT_FLOAT);
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
auto input_scores_tensor,
MaybeSqueezeDims(kScores, cc->Inputs().Tag(kScores).Get<tf::Tensor>()));
RET_CHECK_EQ(input_scores_tensor.dtype(), tf::DT_FLOAT);
@ -173,7 +173,7 @@ class ObjectDetectionTensorsToDetectionsCalculator : public CalculatorBase {
tf::Tensor input_classes_tensor =
tf::Tensor(tf::DT_FLOAT, tf::TensorShape({0}));
if (cc->Inputs().HasTag(kClasses)) {
ASSIGN_OR_RETURN(
MP_ASSIGN_OR_RETURN(
input_classes_tensor,
MaybeSqueezeDims(kClasses,
cc->Inputs().Tag(kClasses).Get<tf::Tensor>()));

View File

@ -75,10 +75,11 @@ namespace mpms = mediapipe::mediasequence;
// vector<pair<float, float>>>,
// * "CLIP_MEDIA_ID", which stores the clip's media ID as a string.
// * "CLIP_LABEL_${NAME}" which stores sparse feature labels, ID and scores in
// mediapipe::Detection.
// mediapipe::Detection. In the input Detection, the score field is required,
// and label and label_id are optional but at least one of them should be set.
// "IMAGE_${NAME}", "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store
// prefixed versions of each stream, which allows for multiple image streams to
// be included. However, the default names are suppored by more tools.
// be included. However, the default names are supported by more tools.
//
// Example config:
// node {
@ -514,24 +515,37 @@ class PackMediaSequenceCalculator : public CalculatorBase {
const std::string& key = tag.substr(
sizeof(kClipLabelPrefixTag) / sizeof(*kClipLabelPrefixTag) - 1);
const Detection& detection = cc->Inputs().Tag(tag).Get<Detection>();
if (detection.label().size() != detection.score().size()) {
return absl::InvalidArgumentError(
"Different size of detection.label and detection.score");
if (detection.score().empty()) {
continue;
}
// Allow empty label_ids, but if label_ids is not empty, it should have
// the same size as the label and score fields.
if (!detection.label_id().empty()) {
if (detection.label_id().size() != detection.label().size()) {
if (detection.label().empty() && detection.label_id().empty()) {
return absl::InvalidArgumentError(
"detection.label and detection.label_id can't be both empty");
}
// Allow empty label (for indexed feature inputs), but if label is not
// empty, it should have the same size as the score field.
if (!detection.label().empty()) {
if (detection.label().size() != detection.score().size()) {
return absl::InvalidArgumentError(
"Different size of detection.label_id and detection.label");
"Different size of detection.label and detection.score");
}
}
for (int i = 0; i < detection.label().size(); ++i) {
// Allow empty label_ids, but if label_ids is not empty, it should have
// the same size as the score field.
if (!detection.label_id().empty()) {
if (detection.label_id().size() != detection.score().size()) {
return absl::InvalidArgumentError(
"Different size of detection.label_id and detection.score");
}
}
for (int i = 0; i < detection.score().size(); ++i) {
if (!detection.label_id().empty()) {
mpms::AddClipLabelIndex(key, detection.label_id(i),
sequence_.get());
}
mpms::AddClipLabelString(key, detection.label(i), sequence_.get());
if (!detection.label().empty()) {
mpms::AddClipLabelString(key, detection.label(i), sequence_.get());
}
mpms::AddClipLabelConfidence(key, detection.score(i),
sequence_.get());
}

View File

@ -75,6 +75,7 @@ constexpr char kImageTag[] = "IMAGE";
constexpr char kClipMediaIdTag[] = "CLIP_MEDIA_ID";
constexpr char kClipLabelTestTag[] = "CLIP_LABEL_TEST";
constexpr char kClipLabelOtherTag[] = "CLIP_LABEL_OTHER";
constexpr char kClipLabelAnotherTag[] = "CLIP_LABEL_ANOTHER";
class PackMediaSequenceCalculatorTest : public ::testing::Test {
protected:
@ -1166,9 +1167,10 @@ TEST_F(PackMediaSequenceCalculatorTest, PacksTwoMaskDetections) {
testing::ElementsAreArray(::std::vector<std::string>({"mask"})));
}
TEST_F(PackMediaSequenceCalculatorTest, PackTwoClipLabels) {
TEST_F(PackMediaSequenceCalculatorTest, PackThreeClipLabels) {
SetUpCalculator(
/*input_streams=*/{"CLIP_LABEL_TEST:test", "CLIP_LABEL_OTHER:test2"},
/*input_streams=*/{"CLIP_LABEL_TEST:test", "CLIP_LABEL_OTHER:test2",
"CLIP_LABEL_ANOTHER:test3"},
/*features=*/{}, /*output_only_if_all_present=*/false,
/*replace_instead_of_append=*/true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
@ -1192,6 +1194,16 @@ TEST_F(PackMediaSequenceCalculatorTest, PackTwoClipLabels) {
runner_->MutableInputs()
->Tag(kClipLabelOtherTag)
.packets.push_back(MakePacket<Detection>(detection_2).At(Timestamp(2)));
// No label for detection_3.
Detection detection_3;
detection_3.add_label_id(3);
detection_3.add_label_id(4);
detection_3.add_score(0.3);
detection_3.add_score(0.4);
runner_->MutableInputs()
->Tag(kClipLabelAnotherTag)
.packets.push_back(MakePacket<Detection>(detection_3).At(Timestamp(3)));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
@ -1214,6 +1226,86 @@ TEST_F(PackMediaSequenceCalculatorTest, PackTwoClipLabels) {
ASSERT_FALSE(mpms::HasClipLabelIndex("OTHER", output_sequence));
ASSERT_THAT(mpms::GetClipLabelConfidence("OTHER", output_sequence),
testing::ElementsAre(0.3, 0.4));
ASSERT_FALSE(mpms::HasClipLabelString("ANOTHER", output_sequence));
ASSERT_THAT(mpms::GetClipLabelIndex("ANOTHER", output_sequence),
testing::ElementsAre(3, 4));
ASSERT_THAT(mpms::GetClipLabelConfidence("ANOTHER", output_sequence),
testing::ElementsAre(0.3, 0.4));
}
TEST_F(PackMediaSequenceCalculatorTest, PackTwoClipLabels_EmptyScore) {
SetUpCalculator(
/*input_streams=*/{"CLIP_LABEL_TEST:test", "CLIP_LABEL_OTHER:test2"},
/*features=*/{}, /*output_only_if_all_present=*/false,
/*replace_instead_of_append=*/true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
// No score in detection_1. detection_1 is ignored.
Detection detection_1;
detection_1.add_label("label_1");
detection_1.add_label("label_2");
runner_->MutableInputs()
->Tag(kClipLabelTestTag)
.packets.push_back(MakePacket<Detection>(detection_1).At(Timestamp(1)));
Detection detection_2;
detection_2.add_label("label_3");
detection_2.add_label("label_4");
detection_2.add_score(0.3);
detection_2.add_score(0.4);
runner_->MutableInputs()
->Tag(kClipLabelOtherTag)
.packets.push_back(MakePacket<Detection>(detection_2).At(Timestamp(2)));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
MP_ASSERT_OK(runner_->Run());
const std::vector<Packet>& output_packets =
runner_->Outputs().Tag(kSequenceExampleTag).packets;
ASSERT_EQ(1, output_packets.size());
const tf::SequenceExample& output_sequence =
output_packets[0].Get<tf::SequenceExample>();
ASSERT_FALSE(mpms::HasClipLabelString("TEST", output_sequence));
ASSERT_FALSE(mpms::HasClipLabelIndex("TEST", output_sequence));
ASSERT_FALSE(mpms::HasClipLabelConfidence("TEST", output_sequence));
ASSERT_THAT(mpms::GetClipLabelString("OTHER", output_sequence),
testing::ElementsAre("label_3", "label_4"));
ASSERT_FALSE(mpms::HasClipLabelIndex("OTHER", output_sequence));
ASSERT_THAT(mpms::GetClipLabelConfidence("OTHER", output_sequence),
testing::ElementsAre(0.3, 0.4));
}
TEST_F(PackMediaSequenceCalculatorTest, PackTwoClipLabels_NoLabelOrLabelIndex) {
SetUpCalculator(
/*input_streams=*/{"CLIP_LABEL_TEST:test", "CLIP_LABEL_OTHER:test2"},
/*features=*/{}, /*output_only_if_all_present=*/false,
/*replace_instead_of_append=*/true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
// No label or label_index in detection_1.
Detection detection_1;
detection_1.add_score(0.1);
runner_->MutableInputs()
->Tag(kClipLabelTestTag)
.packets.push_back(MakePacket<Detection>(detection_1).At(Timestamp(1)));
Detection detection_2;
detection_2.add_label("label_3");
detection_2.add_label("label_4");
detection_2.add_score(0.3);
detection_2.add_score(0.4);
runner_->MutableInputs()
->Tag(kClipLabelOtherTag)
.packets.push_back(MakePacket<Detection>(detection_2).At(Timestamp(2)));
runner_->MutableSidePackets()->Tag(kSequenceExampleTag) =
Adopt(input_sequence.release());
ASSERT_THAT(
runner_->Run(),
testing::status::StatusIs(
absl::StatusCode::kInvalidArgument,
testing::HasSubstr(
"detection.label and detection.label_id can't be both empty")));
}
TEST_F(PackMediaSequenceCalculatorTest,
@ -1259,7 +1351,7 @@ TEST_F(PackMediaSequenceCalculatorTest,
/*replace_instead_of_append=*/true);
auto input_sequence = ::absl::make_unique<tf::SequenceExample>();
// 2 labels and 1 label_id in detection_1.
// 2 scores and 1 label_id in detection_1.
Detection detection_1;
detection_1.add_label("label_1");
detection_1.add_label("label_2");
@ -1285,7 +1377,7 @@ TEST_F(PackMediaSequenceCalculatorTest,
testing::status::StatusIs(
absl::StatusCode::kInvalidArgument,
testing::HasSubstr(
"Different size of detection.label_id and detection.label")));
"Different size of detection.label_id and detection.score")));
}
TEST_F(PackMediaSequenceCalculatorTest, ReplaceTwoClipLabels) {

View File

@ -67,8 +67,8 @@ absl::Status FillTimeSeriesHeaderIfValid(const Packet& header_packet,
// -- 1-D or 2-D Tensor
// Output:
// -- Matrix with the same values as the Tensor
// If input tensor is 1 dimensional, the ouput Matrix is of (1xn) shape.
// If input tensor is 2 dimensional (batched), the ouput Matrix is (mxn) shape.
// If input tensor is 1 dimensional, the output Matrix is of (1xn) shape.
// If input tensor is 2 dimensional (batched), the output Matrix is (mxn) shape.
//
// Example Config
// node: {

View File

@ -15,9 +15,9 @@
// Calculator converts from one-dimensional Tensor of DT_FLOAT to vector<float>
// OR from (batched) two-dimensional Tensor of DT_FLOAT to vector<vector<float>.
#include <cstdint>
#include <memory>
#include "absl/base/integral_types.h"
#include "mediapipe/calculators/tensorflow/tensor_to_vector_int_calculator_options.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/status.h"

View File

@ -111,8 +111,8 @@ class InferenceState {
// input_side_packet.
//
// The input and output streams are TensorFlow tensors labeled by tags. The tags
// for the streams are matched to feeds and fetchs in a TensorFlow session using
// a named_signature.generic_signature in the ModelManifest. The
// for the streams are matched to feeds and fetches in a TensorFlow session
// using a named_signature.generic_signature in the ModelManifest. The
// generic_signature is used as key-value pairs between the MediaPipe tag and
// the TensorFlow tensor. The signature_name in the options proto determines
// which named_signature is used. The keys in the generic_signature must be
@ -128,7 +128,7 @@ class InferenceState {
// addition. Once batch_size inputs have been provided, the batch will be run
// and the output tensors sent out on the output streams with timestamps
// corresponding to the input stream packets. Setting the batch_size to 1
// completely disables batching, but is indepdent of add_batch_dim_to_tensors.
// completely disables batching, but is independent of add_batch_dim_to_tensors.
//
// The TensorFlowInferenceCalculator also support feeding states recurrently for
// RNNs and LSTMs. Simply set the recurrent_tag_pair options to define the

View File

@ -42,7 +42,7 @@ message TensorFlowInferenceCalculatorOptions {
// If the 0th dimension is the batch dimension, then the tensors are
// concatenated on that dimension. If the 0th is a data dimension, then a 0th
// dimension is added before concatenating. If added, the extra dimension is
// removed before outputing the tensor. Examples of each case: If you want
// removed before outputting the tensor. Examples of each case: If you want
// to batch spectra of audio over time for an LSTM, a time-frequency
// representation has a 0th dimension as the batch dimension. If you want to
// batch frames of video that are [width, height, channels], the batch

View File

@ -1,2 +1,2 @@
The model files add.bin, add_quantized.bin
(and corresponding metatada json files) come from tensorflow/lite/testdata/
(and corresponding metadata json files) come from tensorflow/lite/testdata/

View File

@ -95,7 +95,7 @@ struct GPUData {
// into a TfLiteTensor (float 32) or a GpuBuffer to a tflite::gpu::GlBuffer
// or MTLBuffer.
//
// This calculator is designed to be used with the TfLiteInferenceCalcualtor,
// This calculator is designed to be used with the TfLiteInferenceCalculator,
// as a pre-processing step for calculator inputs.
//
// IMAGE and IMAGE_GPU inputs are normalized to [-1,1] (default) or [0,1],

View File

@ -31,7 +31,7 @@ message TfLiteConverterCalculatorOptions {
// Custom settings to override the internal scaling factors `div` and `sub`.
// Both values must be set to non-negative values. Will only take effect on
// CPU AND when |use_custom_normalization| is set to true. When these custom
// values take effect, the |zero_center| setting above will be overriden, and
// values take effect, the |zero_center| setting above will be overridden, and
// the normalized_value will be calculated as:
// normalized_value = input / custom_div - custom_sub.
optional bool use_custom_normalization = 6 [default = false];

View File

@ -489,8 +489,8 @@ absl::Status TfLiteInferenceCalculator::WriteKernelsToFile() {
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
if (use_kernel_caching_) {
// Save kernel file.
ASSIGN_OR_RETURN(std::vector<uint8_t> kernel_cache,
tflite_gpu_runner_->GetSerializedBinaryCache());
MP_ASSIGN_OR_RETURN(std::vector<uint8_t> kernel_cache,
tflite_gpu_runner_->GetSerializedBinaryCache());
std::string cache_str(kernel_cache.begin(), kernel_cache.end());
MP_RETURN_IF_ERROR(
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
@ -733,7 +733,7 @@ absl::Status TfLiteInferenceCalculator::ReadKernelsFromFile() {
absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
CalculatorContext* cc) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates
@ -817,8 +817,8 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
gpu_data_out_.resize(tflite_gpu_runner_->outputs_size());
for (int i = 0; i < tflite_gpu_runner_->outputs_size(); ++i) {
gpu_data_out_[i] = absl::make_unique<GPUData>();
ASSIGN_OR_RETURN(gpu_data_out_[i]->elements,
tflite_gpu_runner_->GetOutputElements(i));
MP_ASSIGN_OR_RETURN(gpu_data_out_[i]->elements,
tflite_gpu_runner_->GetOutputElements(i));
// Create and bind input buffer.
MP_RETURN_IF_ERROR(
::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer<float>(
@ -839,7 +839,7 @@ absl::Status TfLiteInferenceCalculator::LoadModel(CalculatorContext* cc) {
return absl::OkStatus();
}
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
MP_ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates

View File

@ -101,8 +101,8 @@ absl::Status TfLiteTensorsToClassificationCalculator::Open(
top_k_ = options_.top_k();
if (options_.has_label_map_path()) {
std::string string_path;
ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options_.label_map_path()));
MP_ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options_.label_map_path()));
std::string label_map_string;
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));

View File

@ -25,7 +25,7 @@ message TfLiteTensorsToClassificationCalculatorOptions {
optional TfLiteTensorsToClassificationCalculatorOptions ext = 266399463;
}
// Score threshold for perserving the class.
// Score threshold for preserving the class.
optional float min_score_threshold = 1;
// Number of highest scoring labels to output. If top_k is not positive then
// all labels are used.

View File

@ -116,7 +116,7 @@ void ConvertAnchorsToRawValues(const std::vector<Anchor>& anchors,
// tensors can have 2 or 3 tensors. First tensor is the predicted
// raw boxes/keypoints. The size of the values must be (num_boxes
// * num_predicted_values). Second tensor is the score tensor. The
// size of the valuse must be (num_boxes * num_classes). It's
// size of the values must be (num_boxes * num_classes). It's
// optional to pass in a third tensor for anchors (e.g. for SSD
// models) depend on the outputs of the detection model. The size
// of anchor tensor must be (num_boxes * 4).

View File

@ -69,6 +69,6 @@ message TfLiteTensorsToDetectionsCalculatorOptions {
// representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_vertically = 18 [default = false];
// Score threshold for perserving decoded detections.
// Score threshold for preserving decoded detections.
optional float min_score_thresh = 19;
}

Some files were not shown because too many files have changed in this diff Show More