Project import generated by Copybara.
GitOrigin-RevId: 33adfdf31f3a5cbf9edc07ee1ea583e95080bdc5
This commit is contained in:
parent
b544a314b3
commit
139237092f
|
@ -8,6 +8,7 @@ include README.md
|
||||||
include requirements.txt
|
include requirements.txt
|
||||||
|
|
||||||
recursive-include mediapipe/modules *.tflite *.txt *.binarypb
|
recursive-include mediapipe/modules *.tflite *.txt *.binarypb
|
||||||
|
exclude mediapipe/modules/face_detection/face_detection_full_range.tflite
|
||||||
exclude mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite
|
exclude mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite
|
||||||
exclude mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite
|
exclude mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite
|
||||||
exclude mediapipe/modules/objectron/object_detection_3d_sneakers.tflite
|
exclude mediapipe/modules/objectron/object_detection_3d_sneakers.tflite
|
||||||
|
|
52
README.md
52
README.md
|
@ -55,46 +55,22 @@ See also
|
||||||
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
||||||
for ML models released in MediaPipe.
|
for ML models released in MediaPipe.
|
||||||
|
|
||||||
## MediaPipe in Python
|
|
||||||
|
|
||||||
MediaPipe offers customizable Python solutions as a prebuilt Python package on
|
|
||||||
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
|
|
||||||
`pip install mediapipe`. It also provides tools for users to build their own
|
|
||||||
solutions. Please see
|
|
||||||
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
|
|
||||||
for more info.
|
|
||||||
|
|
||||||
## MediaPipe on the Web
|
|
||||||
|
|
||||||
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
|
|
||||||
and desktop also in web browsers. The official API is under construction, but
|
|
||||||
the core technology has been proven effective. Please see
|
|
||||||
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
|
|
||||||
in Google Developers Blog for details.
|
|
||||||
|
|
||||||
You can use the following links to load a demo in the MediaPipe Visualizer, and
|
|
||||||
over there click the "Runner" icon in the top bar like shown below. The demos
|
|
||||||
use your webcam video as input, which is processed all locally in real-time and
|
|
||||||
never leaves your device.
|
|
||||||
|
|
||||||
![visualizer_runner](docs/images/visualizer_runner.png)
|
|
||||||
|
|
||||||
* [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection)
|
|
||||||
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
|
|
||||||
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
|
|
||||||
* [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking)
|
|
||||||
* [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection)
|
|
||||||
* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking)
|
|
||||||
* [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation)
|
|
||||||
|
|
||||||
## Getting started
|
## Getting started
|
||||||
|
|
||||||
Learn how to [install](https://google.github.io/mediapipe/getting_started/install)
|
To start using MediaPipe
|
||||||
MediaPipe and
|
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
|
||||||
[build example applications](https://google.github.io/mediapipe/getting_started/building_examples),
|
lines code, see example code and demos in
|
||||||
and start exploring our ready-to-use
|
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
|
||||||
[solutions](https://google.github.io/mediapipe/solutions/solutions) that you can
|
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
|
||||||
further extend and customize.
|
|
||||||
|
To use MediaPipe in C++, Android and iOS, which allow further customization of
|
||||||
|
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
|
||||||
|
building your own, learn how to
|
||||||
|
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
|
||||||
|
start building example applications in
|
||||||
|
[C++](https://google.github.io/mediapipe/getting_started/cpp),
|
||||||
|
[Android](https://google.github.io/mediapipe/getting_started/android) and
|
||||||
|
[iOS](https://google.github.io/mediapipe/getting_started/ios).
|
||||||
|
|
||||||
The source code is hosted in the
|
The source code is hosted in the
|
||||||
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
|
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
|
||||||
|
|
|
@ -351,8 +351,8 @@ maven_install(
|
||||||
"androidx.test.espresso:espresso-core:3.1.1",
|
"androidx.test.espresso:espresso-core:3.1.1",
|
||||||
"com.github.bumptech.glide:glide:4.11.0",
|
"com.github.bumptech.glide:glide:4.11.0",
|
||||||
"com.google.android.material:material:aar:1.0.0-rc01",
|
"com.google.android.material:material:aar:1.0.0-rc01",
|
||||||
"com.google.auto.value:auto-value:1.6.4",
|
"com.google.auto.value:auto-value:1.8.1",
|
||||||
"com.google.auto.value:auto-value-annotations:1.6.4",
|
"com.google.auto.value:auto-value-annotations:1.8.1",
|
||||||
"com.google.code.findbugs:jsr305:3.0.2",
|
"com.google.code.findbugs:jsr305:3.0.2",
|
||||||
"com.google.flogger:flogger-system-backend:0.3.1",
|
"com.google.flogger:flogger-system-backend:0.3.1",
|
||||||
"com.google.flogger:flogger:0.3.1",
|
"com.google.flogger:flogger:0.3.1",
|
||||||
|
|
|
@ -92,12 +92,12 @@ each project.
|
||||||
and copy
|
and copy
|
||||||
[the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41)
|
[the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41)
|
||||||
and
|
and
|
||||||
[the face detection tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite).
|
[the face detection tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range.tflite).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bazel build -c opt mediapipe/graphs/face_detection:face_detection_mobile_gpu_binary_graph
|
bazel build -c opt mediapipe/graphs/face_detection:face_detection_mobile_gpu_binary_graph
|
||||||
cp bazel-bin/mediapipe/graphs/face_detection/face_detection_mobile_gpu.binarypb /path/to/your/app/src/main/assets/
|
cp bazel-bin/mediapipe/graphs/face_detection/face_detection_mobile_gpu.binarypb /path/to/your/app/src/main/assets/
|
||||||
cp mediapipe/modules/face_detection/face_detection_front.tflite /path/to/your/app/src/main/assets/
|
cp mediapipe/modules/face_detection/face_detection_short_range.tflite /path/to/your/app/src/main/assets/
|
||||||
```
|
```
|
||||||
|
|
||||||
![Screenshot](../images/mobile/assets_location.png)
|
![Screenshot](../images/mobile/assets_location.png)
|
||||||
|
@ -117,7 +117,6 @@ each project.
|
||||||
implementation 'com.google.flogger:flogger-system-backend:0.3.1'
|
implementation 'com.google.flogger:flogger-system-backend:0.3.1'
|
||||||
implementation 'com.google.code.findbugs:jsr305:3.0.2'
|
implementation 'com.google.code.findbugs:jsr305:3.0.2'
|
||||||
implementation 'com.google.guava:guava:27.0.1-android'
|
implementation 'com.google.guava:guava:27.0.1-android'
|
||||||
implementation 'com.google.guava:guava:27.0.1-android'
|
|
||||||
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
implementation 'com.google.protobuf:protobuf-java:3.11.4'
|
||||||
// CameraX core library
|
// CameraX core library
|
||||||
def camerax_version = "1.0.0-beta10"
|
def camerax_version = "1.0.0-beta10"
|
||||||
|
@ -125,7 +124,7 @@ each project.
|
||||||
implementation "androidx.camera:camera-camera2:$camerax_version"
|
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||||
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||||
// AutoValue
|
// AutoValue
|
||||||
def auto_value_version = "1.6.4"
|
def auto_value_version = "1.8.1"
|
||||||
implementation "com.google.auto.value:auto-value-annotations:$auto_value_version"
|
implementation "com.google.auto.value:auto-value-annotations:$auto_value_version"
|
||||||
annotationProcessor "com.google.auto.value:auto-value:$auto_value_version"
|
annotationProcessor "com.google.auto.value:auto-value:$auto_value_version"
|
||||||
}
|
}
|
||||||
|
|
BIN
docs/images/mobile/pose_world_landmarks.mp4
Normal file
BIN
docs/images/mobile/pose_world_landmarks.mp4
Normal file
Binary file not shown.
|
@ -55,46 +55,22 @@ See also
|
||||||
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
||||||
for ML models released in MediaPipe.
|
for ML models released in MediaPipe.
|
||||||
|
|
||||||
## MediaPipe in Python
|
|
||||||
|
|
||||||
MediaPipe offers customizable Python solutions as a prebuilt Python package on
|
|
||||||
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
|
|
||||||
`pip install mediapipe`. It also provides tools for users to build their own
|
|
||||||
solutions. Please see
|
|
||||||
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
|
|
||||||
for more info.
|
|
||||||
|
|
||||||
## MediaPipe on the Web
|
|
||||||
|
|
||||||
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
|
|
||||||
and desktop also in web browsers. The official API is under construction, but
|
|
||||||
the core technology has been proven effective. Please see
|
|
||||||
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
|
|
||||||
in Google Developers Blog for details.
|
|
||||||
|
|
||||||
You can use the following links to load a demo in the MediaPipe Visualizer, and
|
|
||||||
over there click the "Runner" icon in the top bar like shown below. The demos
|
|
||||||
use your webcam video as input, which is processed all locally in real-time and
|
|
||||||
never leaves your device.
|
|
||||||
|
|
||||||
![visualizer_runner](images/visualizer_runner.png)
|
|
||||||
|
|
||||||
* [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection)
|
|
||||||
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
|
|
||||||
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
|
|
||||||
* [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking)
|
|
||||||
* [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection)
|
|
||||||
* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking)
|
|
||||||
* [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation)
|
|
||||||
|
|
||||||
## Getting started
|
## Getting started
|
||||||
|
|
||||||
Learn how to [install](https://google.github.io/mediapipe/getting_started/install)
|
To start using MediaPipe
|
||||||
MediaPipe and
|
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
|
||||||
[build example applications](https://google.github.io/mediapipe/getting_started/building_examples),
|
lines code, see example code and demos in
|
||||||
and start exploring our ready-to-use
|
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
|
||||||
[solutions](https://google.github.io/mediapipe/solutions/solutions) that you can
|
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
|
||||||
further extend and customize.
|
|
||||||
|
To use MediaPipe in C++, Android and iOS, which allow further customization of
|
||||||
|
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
|
||||||
|
building your own, learn how to
|
||||||
|
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
|
||||||
|
start building example applications in
|
||||||
|
[C++](https://google.github.io/mediapipe/getting_started/cpp),
|
||||||
|
[Android](https://google.github.io/mediapipe/getting_started/android) and
|
||||||
|
[iOS](https://google.github.io/mediapipe/getting_started/ios).
|
||||||
|
|
||||||
The source code is hosted in the
|
The source code is hosted in the
|
||||||
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
|
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can
|
||||||
|
|
|
@ -45,6 +45,15 @@ section.
|
||||||
|
|
||||||
Naming style and availability may differ slightly across platforms/languages.
|
Naming style and availability may differ slightly across platforms/languages.
|
||||||
|
|
||||||
|
#### model_selection
|
||||||
|
|
||||||
|
An integer index `0` or `1`. Use `0` to select a short-range model that works
|
||||||
|
best for faces within 2 meters from the camera, and `1` for a full-range model
|
||||||
|
best for faces within 5 meters. For the full-range option, a sparse model is
|
||||||
|
used for its improved inference speed. Please refer to the
|
||||||
|
[model cards](./models.md#face_detection) for details. Default to `0` if not
|
||||||
|
specified.
|
||||||
|
|
||||||
#### min_detection_confidence
|
#### min_detection_confidence
|
||||||
|
|
||||||
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
|
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
|
||||||
|
@ -72,6 +81,7 @@ install MediaPipe Python package, then learn more in the companion
|
||||||
|
|
||||||
Supported configuration options:
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [model_selection](#model_selection)
|
||||||
* [min_detection_confidence](#min_detection_confidence)
|
* [min_detection_confidence](#min_detection_confidence)
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -83,7 +93,7 @@ mp_drawing = mp.solutions.drawing_utils
|
||||||
# For static images:
|
# For static images:
|
||||||
IMAGE_FILES = []
|
IMAGE_FILES = []
|
||||||
with mp_face_detection.FaceDetection(
|
with mp_face_detection.FaceDetection(
|
||||||
min_detection_confidence=0.5) as face_detection:
|
model_selection=1, min_detection_confidence=0.5) as face_detection:
|
||||||
for idx, file in enumerate(IMAGE_FILES):
|
for idx, file in enumerate(IMAGE_FILES):
|
||||||
image = cv2.imread(file)
|
image = cv2.imread(file)
|
||||||
# Convert the BGR image to RGB and process it with MediaPipe Face Detection.
|
# Convert the BGR image to RGB and process it with MediaPipe Face Detection.
|
||||||
|
@ -103,7 +113,7 @@ with mp_face_detection.FaceDetection(
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
cap = cv2.VideoCapture(0)
|
cap = cv2.VideoCapture(0)
|
||||||
with mp_face_detection.FaceDetection(
|
with mp_face_detection.FaceDetection(
|
||||||
min_detection_confidence=0.5) as face_detection:
|
model_selection=0, min_detection_confidence=0.5) as face_detection:
|
||||||
while cap.isOpened():
|
while cap.isOpened():
|
||||||
success, image = cap.read()
|
success, image = cap.read()
|
||||||
if not success:
|
if not success:
|
||||||
|
@ -139,6 +149,7 @@ and the following usage example.
|
||||||
|
|
||||||
Supported configuration options:
|
Supported configuration options:
|
||||||
|
|
||||||
|
* [modelSelection](#model_selection)
|
||||||
* [minDetectionConfidence](#min_detection_confidence)
|
* [minDetectionConfidence](#min_detection_confidence)
|
||||||
|
|
||||||
```html
|
```html
|
||||||
|
@ -189,6 +200,7 @@ const faceDetection = new FaceDetection({locateFile: (file) => {
|
||||||
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
|
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
|
||||||
}});
|
}});
|
||||||
faceDetection.setOptions({
|
faceDetection.setOptions({
|
||||||
|
modelSelection: 0
|
||||||
minDetectionConfidence: 0.5
|
minDetectionConfidence: 0.5
|
||||||
});
|
});
|
||||||
faceDetection.onResults(onResults);
|
faceDetection.onResults(onResults);
|
||||||
|
@ -255,10 +267,6 @@ same configuration as the GPU pipeline, runs entirely on CPU.
|
||||||
* Target:
|
* Target:
|
||||||
[`mediapipe/examples/desktop/face_detection:face_detection_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/face_detection/BUILD)
|
[`mediapipe/examples/desktop/face_detection:face_detection_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/face_detection/BUILD)
|
||||||
|
|
||||||
### Web
|
|
||||||
|
|
||||||
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
|
|
||||||
|
|
||||||
### Coral
|
### Coral
|
||||||
|
|
||||||
Please refer to
|
Please refer to
|
||||||
|
|
|
@ -69,7 +69,7 @@ and renders using a dedicated
|
||||||
The
|
The
|
||||||
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
||||||
internally uses a
|
internally uses a
|
||||||
[face_detection_subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)
|
[face_detection_subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)
|
||||||
from the
|
from the
|
||||||
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).
|
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,14 @@ to visualize its associated subgraphs, please see
|
||||||
|
|
||||||
### Web
|
### Web
|
||||||
|
|
||||||
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
|
Use [this link](https://viz.mediapipe.dev/demo/hair_segmentation) to load a demo
|
||||||
|
in the MediaPipe Visualizer, and over there click the "Runner" icon in the top
|
||||||
|
bar like shown below. The demos use your webcam video as input, which is
|
||||||
|
processed all locally in real-time and never leaves your device. Please see
|
||||||
|
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
|
||||||
|
in Google Developers Blog for details.
|
||||||
|
|
||||||
|
![visualizer_runner](../images/visualizer_runner.png)
|
||||||
|
|
||||||
## Resources
|
## Resources
|
||||||
|
|
||||||
|
|
|
@ -176,6 +176,16 @@ A list of pose landmarks. Each landmark consists of the following:
|
||||||
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
|
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
|
||||||
landmark being visible (present and not occluded) in the image.
|
landmark being visible (present and not occluded) in the image.
|
||||||
|
|
||||||
|
#### pose_world_landmarks
|
||||||
|
|
||||||
|
Another list of pose landmarks in world coordinates. Each landmark consists of
|
||||||
|
the following:
|
||||||
|
|
||||||
|
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
|
||||||
|
center between hips.
|
||||||
|
* `visibility`: Identical to that defined in the corresponding
|
||||||
|
[pose_landmarks](#pose_landmarks).
|
||||||
|
|
||||||
#### face_landmarks
|
#### face_landmarks
|
||||||
|
|
||||||
A list of 468 face landmarks. Each landmark consists of `x`, `y` and `z`. `x`
|
A list of 468 face landmarks. Each landmark consists of `x`, `y` and `z`. `x`
|
||||||
|
@ -245,6 +255,9 @@ with mp_holistic.Holistic(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
|
# Plot pose world landmarks.
|
||||||
|
mp_drawing.plot_landmarks(
|
||||||
|
results.pose_world_landmarks, mp_holistic.POSE_CONNECTIONS)
|
||||||
|
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
cap = cv2.VideoCapture(0)
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
|
@ -69,7 +69,7 @@ and renders using a dedicated
|
||||||
The
|
The
|
||||||
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
|
||||||
internally uses a
|
internally uses a
|
||||||
[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)
|
[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)
|
||||||
from the
|
from the
|
||||||
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).
|
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).
|
||||||
|
|
||||||
|
@ -193,7 +193,17 @@ on how to build MediaPipe examples.
|
||||||
|
|
||||||
### Web
|
### Web
|
||||||
|
|
||||||
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
|
You can use the following links to load a demo in the MediaPipe Visualizer, and
|
||||||
|
over there click the "Runner" icon in the top bar like shown below. The demos
|
||||||
|
use your webcam video as input, which is processed all locally in real-time and
|
||||||
|
never leaves your device. Please see
|
||||||
|
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
|
||||||
|
in Google Developers Blog for details.
|
||||||
|
|
||||||
|
![visualizer_runner](../images/visualizer_runner.png)
|
||||||
|
|
||||||
|
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
|
||||||
|
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
|
||||||
|
|
||||||
## Resources
|
## Resources
|
||||||
|
|
||||||
|
|
|
@ -14,17 +14,27 @@ nav_order: 30
|
||||||
|
|
||||||
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
|
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
|
||||||
|
|
||||||
* Face detection model for front-facing/selfie camera:
|
* Short-range model (best for faces within 2 meters from the camera):
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite),
|
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range.tflite),
|
||||||
[TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite),
|
[TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite),
|
||||||
[Model card](https://mediapipe.page.link/blazeface-mc)
|
[Model card](https://mediapipe.page.link/blazeface-mc)
|
||||||
* Face detection model for back-facing camera:
|
* Full-range model (dense, best for faces within 5 meters from the camera):
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_back.tflite),
|
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range.tflite),
|
||||||
[Model card](https://mediapipe.page.link/blazeface-back-mc)
|
[Model card](https://mediapipe.page.link/blazeface-back-mc)
|
||||||
* Face detection model for back-facing camera (sparse):
|
* Full-range model (sparse, best for faces within 5 meters from the camera):
|
||||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_back_sparse.tflite),
|
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite),
|
||||||
[Model card](https://mediapipe.page.link/blazeface-back-sparse-mc)
|
[Model card](https://mediapipe.page.link/blazeface-back-sparse-mc)
|
||||||
|
|
||||||
|
Full-range dense and sparse models have the same quality in terms of
|
||||||
|
[F-score](https://en.wikipedia.org/wiki/F-score) however differ in underlying
|
||||||
|
metrics. The dense model is slightly better in
|
||||||
|
[Recall](https://en.wikipedia.org/wiki/Precision_and_recall) whereas the sparse
|
||||||
|
model outperforms the dense one in
|
||||||
|
[Precision](https://en.wikipedia.org/wiki/Precision_and_recall). Speed-wise
|
||||||
|
sparse model is ~30% faster when executing on CPU via
|
||||||
|
[XNNPACK](https://github.com/google/XNNPACK) whereas on GPU the models
|
||||||
|
demonstrate comparable latencies. Depending on your application, you may prefer
|
||||||
|
one over the other.
|
||||||
|
|
||||||
### [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh)
|
### [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh)
|
||||||
|
|
||||||
|
|
|
@ -194,10 +194,23 @@ A list of pose landmarks. Each landmark consists of the following:
|
||||||
* `z`: Represents the landmark depth with the depth at the midpoint of hips
|
* `z`: Represents the landmark depth with the depth at the midpoint of hips
|
||||||
being the origin, and the smaller the value the closer the landmark is to
|
being the origin, and the smaller the value the closer the landmark is to
|
||||||
the camera. The magnitude of `z` uses roughly the same scale as `x`.
|
the camera. The magnitude of `z` uses roughly the same scale as `x`.
|
||||||
|
|
||||||
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
|
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
|
||||||
landmark being visible (present and not occluded) in the image.
|
landmark being visible (present and not occluded) in the image.
|
||||||
|
|
||||||
|
#### pose_world_landmarks
|
||||||
|
|
||||||
|
*Fig 5. Example of MediaPipe Pose real-world 3D coordinates.* |
|
||||||
|
:-----------------------------------------------------------: |
|
||||||
|
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_world_landmarks.mp4" type="video/mp4"></video> |
|
||||||
|
|
||||||
|
Another list of pose landmarks in world coordinates. Each landmark consists of
|
||||||
|
the following:
|
||||||
|
|
||||||
|
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
|
||||||
|
center between hips.
|
||||||
|
* `visibility`: Identical to that defined in the corresponding
|
||||||
|
[pose_landmarks](#pose_landmarks).
|
||||||
|
|
||||||
### Python Solution API
|
### Python Solution API
|
||||||
|
|
||||||
Please first follow general [instructions](../getting_started/python.md) to
|
Please first follow general [instructions](../getting_started/python.md) to
|
||||||
|
@ -242,6 +255,9 @@ with mp_pose.Pose(
|
||||||
mp_drawing.draw_landmarks(
|
mp_drawing.draw_landmarks(
|
||||||
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
|
||||||
|
# Plot pose world landmarks.
|
||||||
|
mp_drawing.plot_landmarks(
|
||||||
|
results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||||
|
|
||||||
# For webcam input:
|
# For webcam input:
|
||||||
cap = cv2.VideoCapture(0)
|
cap = cv2.VideoCapture(0)
|
||||||
|
@ -294,6 +310,7 @@ Supported configuration options:
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
|
||||||
</head>
|
</head>
|
||||||
|
@ -312,8 +329,15 @@ Supported configuration options:
|
||||||
const videoElement = document.getElementsByClassName('input_video')[0];
|
const videoElement = document.getElementsByClassName('input_video')[0];
|
||||||
const canvasElement = document.getElementsByClassName('output_canvas')[0];
|
const canvasElement = document.getElementsByClassName('output_canvas')[0];
|
||||||
const canvasCtx = canvasElement.getContext('2d');
|
const canvasCtx = canvasElement.getContext('2d');
|
||||||
|
const landmarkContainer = document.getElementsByClassName('landmark-grid-container')[0];
|
||||||
|
const grid = new LandmarkGrid(landmarkContainer);
|
||||||
|
|
||||||
function onResults(results) {
|
function onResults(results) {
|
||||||
|
if (!results.poseLandmarks) {
|
||||||
|
grid.updateLandmarks([]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
canvasCtx.save();
|
canvasCtx.save();
|
||||||
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
|
||||||
canvasCtx.drawImage(
|
canvasCtx.drawImage(
|
||||||
|
@ -323,6 +347,8 @@ function onResults(results) {
|
||||||
drawLandmarks(canvasCtx, results.poseLandmarks,
|
drawLandmarks(canvasCtx, results.poseLandmarks,
|
||||||
{color: '#FF0000', lineWidth: 2});
|
{color: '#FF0000', lineWidth: 2});
|
||||||
canvasCtx.restore();
|
canvasCtx.restore();
|
||||||
|
|
||||||
|
grid.updateLandmarks(results.poseWorldLandmarks);
|
||||||
}
|
}
|
||||||
|
|
||||||
const pose = new Pose({locateFile: (file) => {
|
const pose = new Pose({locateFile: (file) => {
|
||||||
|
|
|
@ -933,8 +933,8 @@ cc_test(
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "split_normalized_landmark_list_calculator",
|
name = "split_landmarks_calculator",
|
||||||
srcs = ["split_normalized_landmark_list_calculator.cc"],
|
srcs = ["split_landmarks_calculator.cc"],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
":split_vector_calculator_cc_proto",
|
":split_vector_calculator_cc_proto",
|
||||||
|
@ -948,10 +948,10 @@ cc_library(
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_test(
|
cc_test(
|
||||||
name = "split_normalized_landmark_list_calculator_test",
|
name = "split_landmarks_calculator_test",
|
||||||
srcs = ["split_normalized_landmark_list_calculator_test.cc"],
|
srcs = ["split_landmarks_calculator_test.cc"],
|
||||||
deps = [
|
deps = [
|
||||||
":split_normalized_landmark_list_calculator",
|
":split_landmarks_calculator",
|
||||||
":split_vector_calculator_cc_proto",
|
":split_vector_calculator_cc_proto",
|
||||||
"//mediapipe/framework:calculator_framework",
|
"//mediapipe/framework:calculator_framework",
|
||||||
"//mediapipe/framework:calculator_runner",
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
|
|
@ -12,8 +12,8 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
|
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
|
||||||
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
|
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
|
||||||
|
|
||||||
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
|
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
|
||||||
#include "mediapipe/framework/calculator_framework.h"
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
@ -24,29 +24,30 @@
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
// Splits an input packet with NormalizedLandmarkList into
|
// Splits an input packet with LandmarkListType into
|
||||||
// multiple NormalizedLandmarkList output packets using the [begin, end) ranges
|
// multiple LandmarkListType output packets using the [begin, end) ranges
|
||||||
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
|
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
|
||||||
// set to true, all ranges should be of size 1 and all outputs will be elements
|
// set to true, all ranges should be of size 1 and all outputs will be elements
|
||||||
// of type NormalizedLandmark. If "element_only" is false, ranges can be
|
// of type LandmarkType. If "element_only" is false, ranges can be
|
||||||
// non-zero in size and all outputs will be of type NormalizedLandmarkList.
|
// non-zero in size and all outputs will be of type LandmarkListType.
|
||||||
// If the option "combine_outputs" is set to true, only one output stream can be
|
// If the option "combine_outputs" is set to true, only one output stream can be
|
||||||
// specified and all ranges of elements will be combined into one
|
// specified and all ranges of elements will be combined into one
|
||||||
// NormalizedLandmarkList.
|
// LandmarkListType.
|
||||||
class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
|
template <typename LandmarkType, typename LandmarkListType>
|
||||||
|
class SplitLandmarksCalculator : public CalculatorBase {
|
||||||
public:
|
public:
|
||||||
static absl::Status GetContract(CalculatorContract* cc) {
|
static absl::Status GetContract(CalculatorContract* cc) {
|
||||||
RET_CHECK(cc->Inputs().NumEntries() == 1);
|
RET_CHECK(cc->Inputs().NumEntries() == 1);
|
||||||
RET_CHECK(cc->Outputs().NumEntries() != 0);
|
RET_CHECK(cc->Outputs().NumEntries() != 0);
|
||||||
|
|
||||||
cc->Inputs().Index(0).Set<NormalizedLandmarkList>();
|
cc->Inputs().Index(0).Set<LandmarkListType>();
|
||||||
|
|
||||||
const auto& options =
|
const auto& options =
|
||||||
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
|
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
|
||||||
|
|
||||||
if (options.combine_outputs()) {
|
if (options.combine_outputs()) {
|
||||||
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
|
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
|
||||||
cc->Outputs().Index(0).Set<NormalizedLandmarkList>();
|
cc->Outputs().Index(0).Set<LandmarkListType>();
|
||||||
for (int i = 0; i < options.ranges_size() - 1; ++i) {
|
for (int i = 0; i < options.ranges_size() - 1; ++i) {
|
||||||
for (int j = i + 1; j < options.ranges_size(); ++j) {
|
for (int j = i + 1; j < options.ranges_size(); ++j) {
|
||||||
const auto& range_0 = options.ranges(i);
|
const auto& range_0 = options.ranges(i);
|
||||||
|
@ -81,9 +82,9 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
|
||||||
return absl::InvalidArgumentError(
|
return absl::InvalidArgumentError(
|
||||||
"Since element_only is true, all ranges should be of size 1.");
|
"Since element_only is true, all ranges should be of size 1.");
|
||||||
}
|
}
|
||||||
cc->Outputs().Index(i).Set<NormalizedLandmark>();
|
cc->Outputs().Index(i).Set<LandmarkType>();
|
||||||
} else {
|
} else {
|
||||||
cc->Outputs().Index(i).Set<NormalizedLandmarkList>();
|
cc->Outputs().Index(i).Set<LandmarkListType>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,40 +111,39 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Process(CalculatorContext* cc) override {
|
absl::Status Process(CalculatorContext* cc) override {
|
||||||
const NormalizedLandmarkList& input =
|
const LandmarkListType& input =
|
||||||
cc->Inputs().Index(0).Get<NormalizedLandmarkList>();
|
cc->Inputs().Index(0).Get<LandmarkListType>();
|
||||||
RET_CHECK_GE(input.landmark_size(), max_range_end_)
|
RET_CHECK_GE(input.landmark_size(), max_range_end_)
|
||||||
<< "Max range end " << max_range_end_ << " exceeds landmarks size "
|
<< "Max range end " << max_range_end_ << " exceeds landmarks size "
|
||||||
<< input.landmark_size();
|
<< input.landmark_size();
|
||||||
|
|
||||||
if (combine_outputs_) {
|
if (combine_outputs_) {
|
||||||
NormalizedLandmarkList output;
|
LandmarkListType output;
|
||||||
for (int i = 0; i < ranges_.size(); ++i) {
|
for (int i = 0; i < ranges_.size(); ++i) {
|
||||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||||
const NormalizedLandmark& input_landmark = input.landmark(j);
|
const LandmarkType& input_landmark = input.landmark(j);
|
||||||
*output.add_landmark() = input_landmark;
|
*output.add_landmark() = input_landmark;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RET_CHECK_EQ(output.landmark_size(), total_elements_);
|
RET_CHECK_EQ(output.landmark_size(), total_elements_);
|
||||||
cc->Outputs().Index(0).AddPacket(
|
cc->Outputs().Index(0).AddPacket(
|
||||||
MakePacket<NormalizedLandmarkList>(output).At(cc->InputTimestamp()));
|
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
|
||||||
} else {
|
} else {
|
||||||
if (element_only_) {
|
if (element_only_) {
|
||||||
for (int i = 0; i < ranges_.size(); ++i) {
|
for (int i = 0; i < ranges_.size(); ++i) {
|
||||||
cc->Outputs().Index(i).AddPacket(
|
cc->Outputs().Index(i).AddPacket(
|
||||||
MakePacket<NormalizedLandmark>(input.landmark(ranges_[i].first))
|
MakePacket<LandmarkType>(input.landmark(ranges_[i].first))
|
||||||
.At(cc->InputTimestamp()));
|
.At(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < ranges_.size(); ++i) {
|
for (int i = 0; i < ranges_.size(); ++i) {
|
||||||
NormalizedLandmarkList output;
|
LandmarkListType output;
|
||||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||||
const NormalizedLandmark& input_landmark = input.landmark(j);
|
const LandmarkType& input_landmark = input.landmark(j);
|
||||||
*output.add_landmark() = input_landmark;
|
*output.add_landmark() = input_landmark;
|
||||||
}
|
}
|
||||||
cc->Outputs().Index(i).AddPacket(
|
cc->Outputs().Index(i).AddPacket(
|
||||||
MakePacket<NormalizedLandmarkList>(output).At(
|
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
|
||||||
cc->InputTimestamp()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -159,9 +159,15 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
|
||||||
bool combine_outputs_ = false;
|
bool combine_outputs_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef SplitLandmarksCalculator<NormalizedLandmark, NormalizedLandmarkList>
|
||||||
|
SplitNormalizedLandmarkListCalculator;
|
||||||
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
|
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
|
||||||
|
|
||||||
|
typedef SplitLandmarksCalculator<Landmark, LandmarkList>
|
||||||
|
SplitLandmarkListCalculator;
|
||||||
|
REGISTER_CALCULATOR(SplitLandmarkListCalculator);
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
||||||
// NOLINTNEXTLINE
|
// NOLINTNEXTLINE
|
||||||
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_
|
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_
|
|
@ -80,6 +80,16 @@ mediapipe_proto_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "segmentation_smoothing_calculator_proto",
|
||||||
|
srcs = ["segmentation_smoothing_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "color_convert_calculator",
|
name = "color_convert_calculator",
|
||||||
srcs = ["color_convert_calculator.cc"],
|
srcs = ["color_convert_calculator.cc"],
|
||||||
|
@ -602,3 +612,52 @@ cc_test(
|
||||||
"//mediapipe/framework/port:parse_text_proto",
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "segmentation_smoothing_calculator",
|
||||||
|
srcs = ["segmentation_smoothing_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":segmentation_smoothing_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_options_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_frame_opencv",
|
||||||
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:image_opencv",
|
||||||
|
"//mediapipe/framework/port:logging",
|
||||||
|
"//mediapipe/framework/port:opencv_core",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:vector",
|
||||||
|
] + select({
|
||||||
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"//mediapipe/gpu:gl_calculator_helper",
|
||||||
|
"//mediapipe/gpu:gl_simple_shaders",
|
||||||
|
"//mediapipe/gpu:gl_quad_renderer",
|
||||||
|
"//mediapipe/gpu:shader_util",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "segmentation_smoothing_calculator_test",
|
||||||
|
srcs = ["segmentation_smoothing_calculator_test.cc"],
|
||||||
|
deps = [
|
||||||
|
":image_clone_calculator",
|
||||||
|
":image_clone_calculator_cc_proto",
|
||||||
|
":segmentation_smoothing_calculator",
|
||||||
|
":segmentation_smoothing_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:calculator_runner",
|
||||||
|
"//mediapipe/framework/deps:file_path",
|
||||||
|
"//mediapipe/framework/formats:image_frame",
|
||||||
|
"//mediapipe/framework/formats:image_opencv",
|
||||||
|
"//mediapipe/framework/port:gtest_main",
|
||||||
|
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||||
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
|
"//mediapipe/framework/port:parse_text_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
429
mediapipe/calculators/image/segmentation_smoothing_calculator.cc
Normal file
429
mediapipe/calculators/image/segmentation_smoothing_calculator.cc
Normal file
|
@ -0,0 +1,429 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/segmentation_smoothing_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_options.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||||
|
#include "mediapipe/framework/formats/image_opencv.h"
|
||||||
|
#include "mediapipe/framework/port/logging.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||||
|
#include "mediapipe/framework/port/status.h"
|
||||||
|
#include "mediapipe/framework/port/vector.h"
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||||
|
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||||
|
#include "mediapipe/gpu/shader_util.h"
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
constexpr char kCurrentMaskTag[] = "MASK";
|
||||||
|
constexpr char kPreviousMaskTag[] = "MASK_PREVIOUS";
|
||||||
|
constexpr char kOutputMaskTag[] = "MASK_SMOOTHED";
|
||||||
|
|
||||||
|
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// A calculator for mixing two segmentation masks together,
|
||||||
|
// based on an uncertantity probability estimate.
|
||||||
|
//
|
||||||
|
// Inputs:
|
||||||
|
// MASK - Image containing the new/current mask.
|
||||||
|
// [ImageFormat::VEC32F1, or
|
||||||
|
// GpuBufferFormat::kBGRA32/kRGB24/kGrayHalf16/kGrayFloat32]
|
||||||
|
// MASK_PREVIOUS - Image containing previous mask.
|
||||||
|
// [Same format as MASK_CURRENT]
|
||||||
|
// * If input channels is >1, only the first channel (R) is used as the mask.
|
||||||
|
//
|
||||||
|
// Output:
|
||||||
|
// MASK_SMOOTHED - Blended mask.
|
||||||
|
// [Same format as MASK_CURRENT]
|
||||||
|
// * The resulting filtered mask will be stored in R channel,
|
||||||
|
// and duplicated in A if 4 channels.
|
||||||
|
//
|
||||||
|
// Options:
|
||||||
|
// combine_with_previous_ratio - Amount of previous to blend with current.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// node {
|
||||||
|
// calculator: "SegmentationSmoothingCalculator"
|
||||||
|
// input_stream: "MASK:mask"
|
||||||
|
// input_stream: "MASK_PREVIOUS:mask_previous"
|
||||||
|
// output_stream: "MASK_SMOOTHED:mask_smoothed"
|
||||||
|
// options: {
|
||||||
|
// [mediapipe.SegmentationSmoothingCalculatorOptions.ext] {
|
||||||
|
// combine_with_previous_ratio: 0.9
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
class SegmentationSmoothingCalculator : public CalculatorBase {
|
||||||
|
public:
|
||||||
|
SegmentationSmoothingCalculator() = default;
|
||||||
|
|
||||||
|
static absl::Status GetContract(CalculatorContract* cc);
|
||||||
|
|
||||||
|
// From Calculator.
|
||||||
|
absl::Status Open(CalculatorContext* cc) override;
|
||||||
|
absl::Status Process(CalculatorContext* cc) override;
|
||||||
|
absl::Status Close(CalculatorContext* cc) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
absl::Status RenderGpu(CalculatorContext* cc);
|
||||||
|
absl::Status RenderCpu(CalculatorContext* cc);
|
||||||
|
|
||||||
|
absl::Status GlSetup(CalculatorContext* cc);
|
||||||
|
void GlRender(CalculatorContext* cc);
|
||||||
|
|
||||||
|
float combine_with_previous_ratio_;
|
||||||
|
|
||||||
|
bool gpu_initialized_ = false;
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
mediapipe::GlCalculatorHelper gpu_helper_;
|
||||||
|
GLuint program_ = 0;
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
};
|
||||||
|
REGISTER_CALCULATOR(SegmentationSmoothingCalculator);
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::GetContract(
|
||||||
|
CalculatorContract* cc) {
|
||||||
|
CHECK_GE(cc->Inputs().NumEntries(), 1);
|
||||||
|
|
||||||
|
cc->Inputs().Tag(kCurrentMaskTag).Set<Image>();
|
||||||
|
cc->Inputs().Tag(kPreviousMaskTag).Set<Image>();
|
||||||
|
cc->Outputs().Tag(kOutputMaskTag).Set<Image>();
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::Open(CalculatorContext* cc) {
|
||||||
|
cc->SetOffset(TimestampDiff(0));
|
||||||
|
|
||||||
|
auto options =
|
||||||
|
cc->Options<mediapipe::SegmentationSmoothingCalculatorOptions>();
|
||||||
|
combine_with_previous_ratio_ = options.combine_with_previous_ratio();
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::Process(CalculatorContext* cc) {
|
||||||
|
if (cc->Inputs().Tag(kCurrentMaskTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
if (cc->Inputs().Tag(kPreviousMaskTag).IsEmpty()) {
|
||||||
|
// Pass through current image if previous is not available.
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kOutputMaskTag)
|
||||||
|
.AddPacket(cc->Inputs().Tag(kCurrentMaskTag).Value());
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run on GPU if incoming data is on GPU.
|
||||||
|
const bool use_gpu = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>().UsesGpu();
|
||||||
|
|
||||||
|
if (use_gpu) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||||
|
if (!gpu_initialized_) {
|
||||||
|
MP_RETURN_IF_ERROR(GlSetup(cc));
|
||||||
|
gpu_initialized_ = true;
|
||||||
|
}
|
||||||
|
MP_RETURN_IF_ERROR(RenderGpu(cc));
|
||||||
|
return absl::OkStatus();
|
||||||
|
}));
|
||||||
|
#else
|
||||||
|
return absl::InternalError("GPU processing is disabled.");
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
} else {
|
||||||
|
MP_RETURN_IF_ERROR(RenderCpu(cc));
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
gpu_helper_.RunInGlContext([this] {
|
||||||
|
if (program_) glDeleteProgram(program_);
|
||||||
|
program_ = 0;
|
||||||
|
});
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
|
||||||
|
// Setup source images.
|
||||||
|
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
|
||||||
|
const cv::Mat current_mat = mediapipe::formats::MatView(¤t_frame);
|
||||||
|
RET_CHECK_EQ(current_mat.type(), CV_32FC1)
|
||||||
|
<< "Only 1-channel float input image is supported.";
|
||||||
|
|
||||||
|
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
|
||||||
|
const cv::Mat previous_mat = mediapipe::formats::MatView(&previous_frame);
|
||||||
|
RET_CHECK_EQ(previous_mat.type(), current_mat.type())
|
||||||
|
<< "Warning: mixing input format types: " << previous_mat.type()
|
||||||
|
<< " != " << previous_mat.type();
|
||||||
|
|
||||||
|
RET_CHECK_EQ(current_mat.rows, previous_mat.rows);
|
||||||
|
RET_CHECK_EQ(current_mat.cols, previous_mat.cols);
|
||||||
|
|
||||||
|
// Setup destination image.
|
||||||
|
auto output_frame = std::make_shared<ImageFrame>(
|
||||||
|
current_frame.image_format(), current_mat.cols, current_mat.rows);
|
||||||
|
cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get());
|
||||||
|
output_mat.setTo(cv::Scalar(0));
|
||||||
|
|
||||||
|
// Blending function.
|
||||||
|
const auto blending_fn = [&](const float prev_mask_value,
|
||||||
|
const float new_mask_value) {
|
||||||
|
/*
|
||||||
|
* Assume p := new_mask_value
|
||||||
|
* H(p) := 1 + (p * log(p) + (1-p) * log(1-p)) / log(2)
|
||||||
|
* uncertainty alpha(p) =
|
||||||
|
* Clamp(1 - (1 - H(p)) * (1 - H(p)), 0, 1) [squaring the uncertainty]
|
||||||
|
*
|
||||||
|
* The following polynomial approximates uncertainty alpha as a function
|
||||||
|
* of (p + 0.5):
|
||||||
|
*/
|
||||||
|
const float c1 = 5.68842;
|
||||||
|
const float c2 = -0.748699;
|
||||||
|
const float c3 = -57.8051;
|
||||||
|
const float c4 = 291.309;
|
||||||
|
const float c5 = -624.717;
|
||||||
|
const float t = new_mask_value - 0.5f;
|
||||||
|
const float x = t * t;
|
||||||
|
|
||||||
|
const float uncertainty =
|
||||||
|
1.0f -
|
||||||
|
std::min(1.0f, x * (c1 + x * (c2 + x * (c3 + x * (c4 + x * c5)))));
|
||||||
|
|
||||||
|
return new_mask_value + (prev_mask_value - new_mask_value) *
|
||||||
|
(uncertainty * combine_with_previous_ratio_);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Write directly to the first channel of output.
|
||||||
|
for (int i = 0; i < output_mat.rows; ++i) {
|
||||||
|
float* out_ptr = output_mat.ptr<float>(i);
|
||||||
|
const float* curr_ptr = current_mat.ptr<float>(i);
|
||||||
|
const float* prev_ptr = previous_mat.ptr<float>(i);
|
||||||
|
for (int j = 0; j < output_mat.cols; ++j) {
|
||||||
|
const float new_mask_value = curr_ptr[j];
|
||||||
|
const float prev_mask_value = prev_ptr[j];
|
||||||
|
out_ptr[j] = blending_fn(prev_mask_value, new_mask_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kOutputMaskTag)
|
||||||
|
.AddPacket(MakePacket<Image>(output_frame).At(cc->InputTimestamp()));
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::RenderGpu(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
// Setup source textures.
|
||||||
|
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
|
||||||
|
RET_CHECK(
|
||||||
|
(current_frame.format() == mediapipe::GpuBufferFormat::kBGRA32 ||
|
||||||
|
current_frame.format() == mediapipe::GpuBufferFormat::kGrayHalf16 ||
|
||||||
|
current_frame.format() == mediapipe::GpuBufferFormat::kGrayFloat32 ||
|
||||||
|
current_frame.format() == mediapipe::GpuBufferFormat::kRGB24))
|
||||||
|
<< "Only RGBA, RGB, or 1-channel Float input image supported.";
|
||||||
|
|
||||||
|
auto current_texture = gpu_helper_.CreateSourceTexture(current_frame);
|
||||||
|
|
||||||
|
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
|
||||||
|
if (previous_frame.format() != current_frame.format()) {
|
||||||
|
LOG(ERROR) << "Warning: mixing input format types. ";
|
||||||
|
}
|
||||||
|
auto previous_texture = gpu_helper_.CreateSourceTexture(previous_frame);
|
||||||
|
|
||||||
|
// Setup destination texture.
|
||||||
|
const int width = current_frame.width(), height = current_frame.height();
|
||||||
|
auto output_texture = gpu_helper_.CreateDestinationTexture(
|
||||||
|
width, height, current_frame.format());
|
||||||
|
|
||||||
|
// Process shader.
|
||||||
|
{
|
||||||
|
gpu_helper_.BindFramebuffer(output_texture);
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, current_texture.name());
|
||||||
|
glActiveTexture(GL_TEXTURE2);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, previous_texture.name());
|
||||||
|
GlRender(cc);
|
||||||
|
glActiveTexture(GL_TEXTURE2);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
glActiveTexture(GL_TEXTURE1);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
}
|
||||||
|
glFlush();
|
||||||
|
|
||||||
|
// Send out image as GPU packet.
|
||||||
|
auto output_frame = output_texture.GetFrame<Image>();
|
||||||
|
cc->Outputs()
|
||||||
|
.Tag(kOutputMaskTag)
|
||||||
|
.Add(output_frame.release(), cc->InputTimestamp());
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SegmentationSmoothingCalculator::GlRender(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
static const GLfloat square_vertices[] = {
|
||||||
|
-1.0f, -1.0f, // bottom left
|
||||||
|
1.0f, -1.0f, // bottom right
|
||||||
|
-1.0f, 1.0f, // top left
|
||||||
|
1.0f, 1.0f, // top right
|
||||||
|
};
|
||||||
|
static const GLfloat texture_vertices[] = {
|
||||||
|
0.0f, 0.0f, // bottom left
|
||||||
|
1.0f, 0.0f, // bottom right
|
||||||
|
0.0f, 1.0f, // top left
|
||||||
|
1.0f, 1.0f, // top right
|
||||||
|
};
|
||||||
|
|
||||||
|
// program
|
||||||
|
glUseProgram(program_);
|
||||||
|
|
||||||
|
// vertex storage
|
||||||
|
GLuint vbo[2];
|
||||||
|
glGenBuffers(2, vbo);
|
||||||
|
GLuint vao;
|
||||||
|
glGenVertexArrays(1, &vao);
|
||||||
|
glBindVertexArray(vao);
|
||||||
|
|
||||||
|
// vbo 0
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo[0]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), square_vertices,
|
||||||
|
GL_STATIC_DRAW);
|
||||||
|
glEnableVertexAttribArray(ATTRIB_VERTEX);
|
||||||
|
glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// vbo 1
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, vbo[1]);
|
||||||
|
glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), texture_vertices,
|
||||||
|
GL_STATIC_DRAW);
|
||||||
|
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
|
||||||
|
glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, nullptr);
|
||||||
|
|
||||||
|
// draw
|
||||||
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
|
||||||
|
// cleanup
|
||||||
|
glDisableVertexAttribArray(ATTRIB_VERTEX);
|
||||||
|
glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
|
||||||
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
|
glBindVertexArray(0);
|
||||||
|
glDeleteVertexArrays(1, &vao);
|
||||||
|
glDeleteBuffers(2, vbo);
|
||||||
|
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status SegmentationSmoothingCalculator::GlSetup(CalculatorContext* cc) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
|
const GLint attr_location[NUM_ATTRIBUTES] = {
|
||||||
|
ATTRIB_VERTEX,
|
||||||
|
ATTRIB_TEXTURE_POSITION,
|
||||||
|
};
|
||||||
|
const GLchar* attr_name[NUM_ATTRIBUTES] = {
|
||||||
|
"position",
|
||||||
|
"texture_coordinate",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Shader to blend in previous mask based on computed uncertainty probability.
|
||||||
|
const std::string frag_src =
|
||||||
|
absl::StrCat(std::string(mediapipe::kMediaPipeFragmentShaderPreamble),
|
||||||
|
R"(
|
||||||
|
DEFAULT_PRECISION(mediump, float)
|
||||||
|
|
||||||
|
#ifdef GL_ES
|
||||||
|
#define fragColor gl_FragColor
|
||||||
|
#else
|
||||||
|
out vec4 fragColor;
|
||||||
|
#endif // defined(GL_ES);
|
||||||
|
|
||||||
|
in vec2 sample_coordinate;
|
||||||
|
uniform sampler2D current_mask;
|
||||||
|
uniform sampler2D previous_mask;
|
||||||
|
uniform float combine_with_previous_ratio;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec4 current_pix = texture2D(current_mask, sample_coordinate);
|
||||||
|
vec4 previous_pix = texture2D(previous_mask, sample_coordinate);
|
||||||
|
float new_mask_value = current_pix.r;
|
||||||
|
float prev_mask_value = previous_pix.r;
|
||||||
|
|
||||||
|
// Assume p := new_mask_value
|
||||||
|
// H(p) := 1 + (p * log(p) + (1-p) * log(1-p)) / log(2)
|
||||||
|
// uncertainty alpha(p) =
|
||||||
|
// Clamp(1 - (1 - H(p)) * (1 - H(p)), 0, 1) [squaring the uncertainty]
|
||||||
|
//
|
||||||
|
// The following polynomial approximates uncertainty alpha as a function
|
||||||
|
// of (p + 0.5):
|
||||||
|
const float c1 = 5.68842;
|
||||||
|
const float c2 = -0.748699;
|
||||||
|
const float c3 = -57.8051;
|
||||||
|
const float c4 = 291.309;
|
||||||
|
const float c5 = -624.717;
|
||||||
|
float t = new_mask_value - 0.5;
|
||||||
|
float x = t * t;
|
||||||
|
|
||||||
|
float uncertainty =
|
||||||
|
1.0 - min(1.0, x * (c1 + x * (c2 + x * (c3 + x * (c4 + x * c5)))));
|
||||||
|
|
||||||
|
new_mask_value +=
|
||||||
|
(prev_mask_value - new_mask_value) * (uncertainty * combine_with_previous_ratio);
|
||||||
|
|
||||||
|
fragColor = vec4(new_mask_value, 0.0, 0.0, new_mask_value);
|
||||||
|
}
|
||||||
|
)");
|
||||||
|
|
||||||
|
// Create shader program and set parameters.
|
||||||
|
mediapipe::GlhCreateProgram(mediapipe::kBasicVertexShader, frag_src.c_str(),
|
||||||
|
NUM_ATTRIBUTES, (const GLchar**)&attr_name[0],
|
||||||
|
attr_location, &program_);
|
||||||
|
RET_CHECK(program_) << "Problem initializing the program.";
|
||||||
|
glUseProgram(program_);
|
||||||
|
glUniform1i(glGetUniformLocation(program_, "current_mask"), 1);
|
||||||
|
glUniform1i(glGetUniformLocation(program_, "previous_mask"), 2);
|
||||||
|
glUniform1f(glGetUniformLocation(program_, "combine_with_previous_ratio"),
|
||||||
|
combine_with_previous_ratio_);
|
||||||
|
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mediapipe
|
|
@ -0,0 +1,35 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package mediapipe;
|
||||||
|
|
||||||
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
|
||||||
|
message SegmentationSmoothingCalculatorOptions {
|
||||||
|
extend CalculatorOptions {
|
||||||
|
optional SegmentationSmoothingCalculatorOptions ext = 377425128;
|
||||||
|
}
|
||||||
|
|
||||||
|
// How much to blend in previous mask, based on a probability estimate.
|
||||||
|
// Range: [0-1]
|
||||||
|
// 0 = Use only current frame (no blending).
|
||||||
|
// 1 = Blend in the previous mask based on uncertainty estimate.
|
||||||
|
// With ratio at 1, the uncertainty estimate is trusted completely.
|
||||||
|
// When uncertainty is high, the previous mask is given higher weight.
|
||||||
|
// Therefore, if both ratio and uncertainty are 1, only old mask is used.
|
||||||
|
// A pixel is 'uncertain' if its value is close to the middle (0.5 or 127).
|
||||||
|
optional float combine_with_previous_ratio = 1 [default = 0.0];
|
||||||
|
}
|
|
@ -0,0 +1,206 @@
|
||||||
|
// Copyright 2018 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/calculators/image/segmentation_smoothing_calculator.pb.h"
|
||||||
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
#include "mediapipe/framework/calculator_runner.h"
|
||||||
|
#include "mediapipe/framework/deps/file_path.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
#include "mediapipe/framework/formats/image_opencv.h"
|
||||||
|
#include "mediapipe/framework/port/gmock.h"
|
||||||
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
|
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||||
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
|
||||||
|
namespace mediapipe {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// 4x4 VEC32F1, center 2x2 block set at ~250
|
||||||
|
const float mask_data[] = {
|
||||||
|
0.00, 0.00, 0.00, 0.00, //
|
||||||
|
0.00, 0.98, 0.98, 0.00, //
|
||||||
|
0.00, 0.98, 0.98, 0.00, //
|
||||||
|
0.00, 0.00, 0.00, 0.00, //
|
||||||
|
};
|
||||||
|
|
||||||
|
void RunGraph(Packet curr_packet, Packet prev_packet, bool use_gpu, float ratio,
|
||||||
|
cv::Mat* result) {
|
||||||
|
CalculatorGraphConfig graph_config;
|
||||||
|
if (use_gpu) {
|
||||||
|
graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
|
||||||
|
R"pb(
|
||||||
|
input_stream: "curr_mask"
|
||||||
|
input_stream: "prev_mask"
|
||||||
|
output_stream: "new_mask"
|
||||||
|
node {
|
||||||
|
calculator: "ImageCloneCalculator"
|
||||||
|
input_stream: "curr_mask"
|
||||||
|
output_stream: "curr_mask_gpu"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageCloneCalculatorOptions.ext] {
|
||||||
|
output_on_gpu: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "ImageCloneCalculator"
|
||||||
|
input_stream: "prev_mask"
|
||||||
|
output_stream: "prev_mask_gpu"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageCloneCalculatorOptions.ext] {
|
||||||
|
output_on_gpu: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "SegmentationSmoothingCalculator"
|
||||||
|
input_stream: "MASK:curr_mask_gpu"
|
||||||
|
input_stream: "MASK_PREVIOUS:prev_mask_gpu"
|
||||||
|
output_stream: "MASK_SMOOTHED:new_mask"
|
||||||
|
node_options {
|
||||||
|
[type.googleapis.com/
|
||||||
|
mediapipe.SegmentationSmoothingCalculatorOptions]: {
|
||||||
|
combine_with_previous_ratio: $0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb",
|
||||||
|
ratio));
|
||||||
|
} else {
|
||||||
|
graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
|
||||||
|
R"pb(
|
||||||
|
input_stream: "curr_mask"
|
||||||
|
input_stream: "prev_mask"
|
||||||
|
output_stream: "new_mask"
|
||||||
|
node {
|
||||||
|
calculator: "SegmentationSmoothingCalculator"
|
||||||
|
input_stream: "MASK:curr_mask"
|
||||||
|
input_stream: "MASK_PREVIOUS:prev_mask"
|
||||||
|
output_stream: "MASK_SMOOTHED:new_mask"
|
||||||
|
node_options {
|
||||||
|
[type.googleapis.com/
|
||||||
|
mediapipe.SegmentationSmoothingCalculatorOptions]: {
|
||||||
|
combine_with_previous_ratio: $0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb",
|
||||||
|
ratio));
|
||||||
|
}
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("new_mask", &graph_config, &output_packets);
|
||||||
|
CalculatorGraph graph(graph_config);
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
|
||||||
|
MP_ASSERT_OK(
|
||||||
|
graph.AddPacketToInputStream("curr_mask", curr_packet.At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(
|
||||||
|
graph.AddPacketToInputStream("prev_mask", prev_packet.At(Timestamp(0))));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
ASSERT_EQ(1, output_packets.size());
|
||||||
|
|
||||||
|
Image result_image = output_packets[0].Get<Image>();
|
||||||
|
cv::Mat result_mat = formats::MatView(&result_image);
|
||||||
|
result_mat.copyTo(*result);
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+Images are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("curr_mask"));
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("prev_mask"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
void RunTest(bool use_gpu, float mix_ratio, cv::Mat& test_result) {
|
||||||
|
cv::Mat mask_mat(cv::Size(4, 4), CV_32FC1, const_cast<float*>(mask_data));
|
||||||
|
cv::Mat curr_mat = mask_mat;
|
||||||
|
// 3x3 blur of 250 block produces all pixels '111'.
|
||||||
|
cv::Mat prev_mat;
|
||||||
|
cv::blur(mask_mat, prev_mat, cv::Size(3, 3));
|
||||||
|
|
||||||
|
Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
|
||||||
|
ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height));
|
||||||
|
curr_mat.copyTo(formats::MatView(&(curr_packet.Get<Image>())));
|
||||||
|
Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
|
||||||
|
ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height));
|
||||||
|
prev_mat.copyTo(formats::MatView(&(prev_packet.Get<Image>())));
|
||||||
|
|
||||||
|
cv::Mat result;
|
||||||
|
RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result);
|
||||||
|
|
||||||
|
ASSERT_EQ(curr_mat.rows, result.rows);
|
||||||
|
ASSERT_EQ(curr_mat.cols, result.cols);
|
||||||
|
ASSERT_EQ(curr_mat.type(), result.type());
|
||||||
|
result.copyTo(test_result);
|
||||||
|
|
||||||
|
if (mix_ratio == 1.0) {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
for (int j = 0; j < 4; ++j) {
|
||||||
|
float in = curr_mat.at<float>(i, j);
|
||||||
|
float out = result.at<float>(i, j);
|
||||||
|
// Since the input has high value (250), it has low uncertainty.
|
||||||
|
// So the output should have changed lower (towards prev),
|
||||||
|
// but not too much.
|
||||||
|
if (in > 0) EXPECT_NE(in, out);
|
||||||
|
EXPECT_NEAR(in, out, 3.0 / 255.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (mix_ratio == 0.0) {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
for (int j = 0; j < 4; ++j) {
|
||||||
|
float in = curr_mat.at<float>(i, j);
|
||||||
|
float out = result.at<float>(i, j);
|
||||||
|
EXPECT_EQ(in, out); // Output should match current.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG(ERROR) << "invalid ratio";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SegmentationSmoothingCalculatorTest, TestSmoothing) {
|
||||||
|
bool use_gpu;
|
||||||
|
float mix_ratio;
|
||||||
|
|
||||||
|
use_gpu = false;
|
||||||
|
mix_ratio = 0.0;
|
||||||
|
cv::Mat cpu_0;
|
||||||
|
RunTest(use_gpu, mix_ratio, cpu_0);
|
||||||
|
|
||||||
|
use_gpu = false;
|
||||||
|
mix_ratio = 1.0;
|
||||||
|
cv::Mat cpu_1;
|
||||||
|
RunTest(use_gpu, mix_ratio, cpu_1);
|
||||||
|
|
||||||
|
use_gpu = true;
|
||||||
|
mix_ratio = 1.0;
|
||||||
|
cv::Mat gpu_1;
|
||||||
|
RunTest(use_gpu, mix_ratio, gpu_1);
|
||||||
|
|
||||||
|
// CPU & GPU should match.
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
for (int j = 0; j < 4; ++j) {
|
||||||
|
float gpu = gpu_1.at<float>(i, j);
|
||||||
|
float cpu = cpu_1.at<float>(i, j);
|
||||||
|
EXPECT_EQ(cpu, gpu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace mediapipe
|
|
@ -109,6 +109,8 @@ cc_library(
|
||||||
"//mediapipe/gpu:MPPMetalUtil",
|
"//mediapipe/gpu:MPPMetalUtil",
|
||||||
"//mediapipe/gpu:gpu_buffer",
|
"//mediapipe/gpu:gpu_buffer",
|
||||||
"//mediapipe/objc:mediapipe_framework_ios",
|
"//mediapipe/objc:mediapipe_framework_ios",
|
||||||
|
"//mediapipe/util/tflite:config",
|
||||||
|
"@com_google_absl//absl/memory",
|
||||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate",
|
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate",
|
||||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
|
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
|
||||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||||
|
@ -478,7 +480,6 @@ cc_library(
|
||||||
deps = [
|
deps = [
|
||||||
":image_to_tensor_calculator_cc_proto",
|
":image_to_tensor_calculator_cc_proto",
|
||||||
":image_to_tensor_converter",
|
":image_to_tensor_converter",
|
||||||
":image_to_tensor_converter_opencv",
|
|
||||||
":image_to_tensor_utils",
|
":image_to_tensor_utils",
|
||||||
"//mediapipe/framework/api2:node",
|
"//mediapipe/framework/api2:node",
|
||||||
"//mediapipe/framework/formats:image",
|
"//mediapipe/framework/formats:image",
|
||||||
|
@ -494,6 +495,9 @@ cc_library(
|
||||||
] + select({
|
] + select({
|
||||||
"//mediapipe/gpu:disable_gpu": [],
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
"//conditions:default": [":image_to_tensor_calculator_gpu_deps"],
|
"//conditions:default": [":image_to_tensor_calculator_gpu_deps"],
|
||||||
|
}) + select({
|
||||||
|
"//mediapipe/framework/port:disable_opencv": [],
|
||||||
|
"//conditions:default": [":image_to_tensor_converter_opencv"],
|
||||||
}),
|
}),
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
|
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||||
#include "mediapipe/framework/api2/node.h"
|
#include "mediapipe/framework/api2/node.h"
|
||||||
#include "mediapipe/framework/calculator_framework.h"
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
|
@ -33,6 +32,10 @@
|
||||||
#include "mediapipe/framework/port/statusor.h"
|
#include "mediapipe/framework/port/statusor.h"
|
||||||
#include "mediapipe/gpu/gpu_origin.pb.h"
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
|
||||||
|
#if !MEDIAPIPE_DISABLE_OPENCV
|
||||||
|
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
#include "mediapipe/gpu/gpu_buffer.h"
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
|
||||||
|
@ -301,8 +304,13 @@ class ImageToTensorCalculator : public Node {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!cpu_converter_) {
|
if (!cpu_converter_) {
|
||||||
|
#if !MEDIAPIPE_DISABLE_OPENCV
|
||||||
ASSIGN_OR_RETURN(cpu_converter_,
|
ASSIGN_OR_RETURN(cpu_converter_,
|
||||||
CreateOpenCvConverter(cc, GetBorderMode()));
|
CreateOpenCvConverter(cc, GetBorderMode()));
|
||||||
|
#else
|
||||||
|
LOG(FATAL) << "Cannot create image to tensor opencv converter since "
|
||||||
|
"MEDIAPIPE_DISABLE_OPENCV is defined.";
|
||||||
|
#endif // !MEDIAPIPE_DISABLE_OPENCV
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
|
|
@ -312,7 +312,7 @@ class GlProcessor : public ImageToTensorConverter {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}));
|
}));
|
||||||
|
|
||||||
return std::move(tensor);
|
return tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
~GlProcessor() override {
|
~GlProcessor() override {
|
||||||
|
@ -338,8 +338,7 @@ CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
|
||||||
auto result = absl::make_unique<GlProcessor>();
|
auto result = absl::make_unique<GlProcessor>();
|
||||||
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
|
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
|
||||||
|
|
||||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
return result;
|
||||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -334,9 +334,7 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
|
||||||
BorderMode border_mode) {
|
BorderMode border_mode) {
|
||||||
auto result = absl::make_unique<GlProcessor>();
|
auto result = absl::make_unique<GlProcessor>();
|
||||||
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
|
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
|
||||||
|
return result;
|
||||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
|
||||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -383,7 +383,7 @@ class MetalProcessor : public ImageToTensorConverter {
|
||||||
tflite::gpu::HW(output_dims.height, output_dims.width),
|
tflite::gpu::HW(output_dims.height, output_dims.width),
|
||||||
command_buffer, buffer_view.buffer()));
|
command_buffer, buffer_view.buffer()));
|
||||||
[command_buffer commit];
|
[command_buffer commit];
|
||||||
return std::move(tensor);
|
return tensor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,8 +399,7 @@ absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateMetalConverter(
|
||||||
auto result = absl::make_unique<MetalProcessor>();
|
auto result = absl::make_unique<MetalProcessor>();
|
||||||
MP_RETURN_IF_ERROR(result->Init(cc, border_mode));
|
MP_RETURN_IF_ERROR(result->Init(cc, border_mode));
|
||||||
|
|
||||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
return result;
|
||||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -103,7 +103,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
||||||
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
|
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
|
||||||
range_min, range_max));
|
range_min, range_max));
|
||||||
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
|
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
|
||||||
return std::move(tensor);
|
return tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -114,10 +114,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
||||||
|
|
||||||
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
|
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
|
||||||
CalculatorContext* cc, BorderMode border_mode) {
|
CalculatorContext* cc, BorderMode border_mode) {
|
||||||
// Simply "return absl::make_unique<OpenCvProcessor>()" failed to build on
|
return absl::make_unique<OpenCvProcessor>(border_mode);
|
||||||
// macOS with bazel.
|
|
||||||
return std::unique_ptr<ImageToTensorConverter>(
|
|
||||||
absl::make_unique<OpenCvProcessor>(border_mode));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -4,7 +4,7 @@ output_stream: "detections"
|
||||||
|
|
||||||
# Subgraph that detects faces.
|
# Subgraph that detects faces.
|
||||||
node {
|
node {
|
||||||
calculator: "FaceDetectionFrontCpu"
|
calculator: "FaceDetectionShortRangeCpu"
|
||||||
input_stream: "IMAGE:image"
|
input_stream: "IMAGE:image"
|
||||||
output_stream: "DETECTIONS:detections"
|
output_stream: "DETECTIONS:detections"
|
||||||
}
|
}
|
||||||
|
|
|
@ -490,7 +490,7 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
|
||||||
<< keyed_tensors.first;
|
<< keyed_tensors.first;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Pad by replicating the first tens or, then ignore the values.
|
// Pad by replicating the first tensor, then ignore the values.
|
||||||
keyed_tensors.second.resize(options_.batch_size());
|
keyed_tensors.second.resize(options_.batch_size());
|
||||||
std::fill(keyed_tensors.second.begin() +
|
std::fill(keyed_tensors.second.begin() +
|
||||||
inference_state->batch_timestamps_.size(),
|
inference_state->batch_timestamps_.size(),
|
||||||
|
|
|
@ -840,6 +840,20 @@ cc_test(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "world_landmark_projection_calculator",
|
||||||
|
srcs = ["world_landmark_projection_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/formats:rect_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_proto_library(
|
mediapipe_proto_library(
|
||||||
name = "landmarks_smoothing_calculator_proto",
|
name = "landmarks_smoothing_calculator_proto",
|
||||||
srcs = ["landmarks_smoothing_calculator.proto"],
|
srcs = ["landmarks_smoothing_calculator.proto"],
|
||||||
|
@ -894,6 +908,31 @@ cc_library(
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_proto_library(
|
||||||
|
name = "visibility_copy_calculator_proto",
|
||||||
|
srcs = ["visibility_copy_calculator.proto"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "visibility_copy_calculator",
|
||||||
|
srcs = ["visibility_copy_calculator.cc"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":visibility_copy_calculator_cc_proto",
|
||||||
|
"//mediapipe/framework:calculator_framework",
|
||||||
|
"//mediapipe/framework:timestamp",
|
||||||
|
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||||
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"@com_google_absl//absl/algorithm:container",
|
||||||
|
],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "landmarks_to_floats_calculator",
|
name = "landmarks_to_floats_calculator",
|
||||||
srcs = ["landmarks_to_floats_calculator.cc"],
|
srcs = ["landmarks_to_floats_calculator.cc"],
|
||||||
|
|
|
@ -272,6 +272,15 @@ absl::Status AnnotationOverlayCalculator::Open(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status AnnotationOverlayCalculator::Process(CalculatorContext* cc) {
|
absl::Status AnnotationOverlayCalculator::Process(CalculatorContext* cc) {
|
||||||
|
if (cc->Inputs().HasTag(kGpuBufferTag) &&
|
||||||
|
cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
if (cc->Inputs().HasTag(kImageFrameTag) &&
|
||||||
|
cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize render target, drawn with OpenCV.
|
// Initialize render target, drawn with OpenCV.
|
||||||
std::unique_ptr<cv::Mat> image_mat;
|
std::unique_ptr<cv::Mat> image_mat;
|
||||||
ImageFormat::Format target_format;
|
ImageFormat::Format target_format;
|
||||||
|
|
|
@ -203,6 +203,9 @@ absl::Status DetectionsToRectsCalculator::Process(CalculatorContext* cc) {
|
||||||
cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
|
cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
if (rotate_ && !HasTagValue(cc, kImageSizeTag)) {
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<Detection> detections;
|
std::vector<Detection> detections;
|
||||||
if (cc->Inputs().HasTag(kDetectionTag)) {
|
if (cc->Inputs().HasTag(kDetectionTag)) {
|
||||||
|
|
|
@ -130,8 +130,8 @@ absl::Status RectTransformationCalculator::Process(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
cc->Outputs().Index(0).Add(output_rects.release(), cc->InputTimestamp());
|
cc->Outputs().Index(0).Add(output_rects.release(), cc->InputTimestamp());
|
||||||
}
|
}
|
||||||
if (cc->Inputs().HasTag(kNormRectTag) &&
|
if (HasTagValue(cc->Inputs(), kNormRectTag) &&
|
||||||
!cc->Inputs().Tag(kNormRectTag).IsEmpty()) {
|
HasTagValue(cc->Inputs(), kImageSizeTag)) {
|
||||||
auto rect = cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>();
|
auto rect = cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>();
|
||||||
const auto& image_size =
|
const auto& image_size =
|
||||||
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||||
|
@ -139,8 +139,8 @@ absl::Status RectTransformationCalculator::Process(CalculatorContext* cc) {
|
||||||
cc->Outputs().Index(0).AddPacket(
|
cc->Outputs().Index(0).AddPacket(
|
||||||
MakePacket<NormalizedRect>(rect).At(cc->InputTimestamp()));
|
MakePacket<NormalizedRect>(rect).At(cc->InputTimestamp()));
|
||||||
}
|
}
|
||||||
if (cc->Inputs().HasTag(kNormRectsTag) &&
|
if (HasTagValue(cc->Inputs(), kNormRectsTag) &&
|
||||||
!cc->Inputs().Tag(kNormRectsTag).IsEmpty()) {
|
HasTagValue(cc->Inputs(), kImageSizeTag)) {
|
||||||
auto rects =
|
auto rects =
|
||||||
cc->Inputs().Tag(kNormRectsTag).Get<std::vector<NormalizedRect>>();
|
cc->Inputs().Tag(kNormRectsTag).Get<std::vector<NormalizedRect>>();
|
||||||
const auto& image_size =
|
const auto& image_size =
|
||||||
|
|
|
@ -549,7 +549,7 @@ absl::Status MotionAnalysisCalculator::Process(CalculatorContext* cc) {
|
||||||
timestamp_buffer_.push_back(timestamp);
|
timestamp_buffer_.push_back(timestamp);
|
||||||
++frame_idx_;
|
++frame_idx_;
|
||||||
|
|
||||||
VLOG_EVERY_N(0, 100) << "Analyzed frame " << frame_idx_;
|
VLOG_EVERY_N(1, 100) << "Analyzed frame " << frame_idx_;
|
||||||
|
|
||||||
// Buffer input frames only if visualization is requested.
|
// Buffer input frames only if visualization is requested.
|
||||||
if (visualize_output_ || video_output_) {
|
if (visualize_output_ || video_output_) {
|
||||||
|
|
|
@ -37,7 +37,7 @@ android_binary(
|
||||||
srcs = glob(["*.java"]),
|
srcs = glob(["*.java"]),
|
||||||
assets = [
|
assets = [
|
||||||
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
|
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
assets_dir = "",
|
assets_dir = "",
|
||||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# Copyright 2019 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:private"])
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "libmediapipe_jni.so",
|
||||||
|
linkshared = 1,
|
||||||
|
linkstatic = 1,
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_mobile_gpu_deps",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "mediapipe_jni_lib",
|
||||||
|
srcs = [":libmediapipe_jni.so"],
|
||||||
|
alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
android_binary(
|
||||||
|
name = "facedetectionfullrangegpu",
|
||||||
|
srcs = glob(["*.java"]),
|
||||||
|
assets = [
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_mobile_gpu.binarypb",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_sparse.tflite",
|
||||||
|
],
|
||||||
|
assets_dir = "",
|
||||||
|
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||||
|
manifest_values = {
|
||||||
|
"applicationId": "com.google.mediapipe.apps.facedetectionfullrangegpu",
|
||||||
|
"appName": "Face Detection Full-range (GPU)",
|
||||||
|
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
|
||||||
|
"cameraFacingFront": "False",
|
||||||
|
"binaryGraphName": "face_detection_full_range_mobile_gpu.binarypb",
|
||||||
|
"inputVideoStreamName": "input_video",
|
||||||
|
"outputVideoStreamName": "output_video",
|
||||||
|
"flipFramesVertically": "True",
|
||||||
|
"converterNumBuffers": "2",
|
||||||
|
},
|
||||||
|
multidex = "native",
|
||||||
|
deps = [
|
||||||
|
":mediapipe_jni_lib",
|
||||||
|
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
|
||||||
|
],
|
||||||
|
)
|
|
@ -37,7 +37,7 @@ android_binary(
|
||||||
srcs = glob(["*.java"]),
|
srcs = glob(["*.java"]),
|
||||||
assets = [
|
assets = [
|
||||||
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
|
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
assets_dir = "",
|
assets_dir = "",
|
||||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||||
|
|
|
@ -42,7 +42,7 @@ android_binary(
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
||||||
"//mediapipe/graphs/face_effect:face_effect_gpu.binarypb",
|
"//mediapipe/graphs/face_effect:face_effect_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
|
|
|
@ -38,7 +38,7 @@ android_binary(
|
||||||
assets = [
|
assets = [
|
||||||
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
|
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
assets_dir = "",
|
assets_dir = "",
|
||||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||||
|
|
|
@ -37,7 +37,7 @@ android_binary(
|
||||||
srcs = glob(["*.java"]),
|
srcs = glob(["*.java"]),
|
||||||
assets = [
|
assets = [
|
||||||
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
||||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||||
|
|
|
@ -39,7 +39,7 @@ android_binary(
|
||||||
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
|
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
|
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
assets_dir = "",
|
assets_dir = "",
|
||||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||||
|
|
|
@ -38,7 +38,7 @@ node {
|
||||||
output_stream: "TENSORS:detection_tensors"
|
output_stream: "TENSORS:detection_tensors"
|
||||||
options: {
|
options: {
|
||||||
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
|
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
|
||||||
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
|
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,14 @@ licenses(["notice"])
|
||||||
|
|
||||||
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
|
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_cpu",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "face_detection_cpu",
|
name = "face_detection_cpu",
|
||||||
deps = [
|
deps = [
|
||||||
|
|
|
@ -55,7 +55,7 @@ objc_library(
|
||||||
name = "FaceDetectionCpuAppLibrary",
|
name = "FaceDetectionCpuAppLibrary",
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
|
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
||||||
|
|
|
@ -55,7 +55,7 @@ objc_library(
|
||||||
name = "FaceDetectionGpuAppLibrary",
|
name = "FaceDetectionGpuAppLibrary",
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
|
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
||||||
|
|
|
@ -66,7 +66,7 @@ objc_library(
|
||||||
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
|
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
|
||||||
|
@ -109,7 +109,7 @@ objc_library(
|
||||||
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
|
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
|
||||||
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
|
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
],
|
],
|
||||||
|
|
|
@ -62,7 +62,7 @@ objc_library(
|
||||||
copts = ["-std=c++17"],
|
copts = ["-std=c++17"],
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
|
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
|
|
|
@ -55,7 +55,7 @@ objc_library(
|
||||||
name = "HolisticTrackingGpuAppLibrary",
|
name = "HolisticTrackingGpuAppLibrary",
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
||||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||||
|
|
|
@ -62,7 +62,7 @@ objc_library(
|
||||||
copts = ["-std=c++17"],
|
copts = ["-std=c++17"],
|
||||||
data = [
|
data = [
|
||||||
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
|
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||||
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
|
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
|
||||||
],
|
],
|
||||||
|
|
|
@ -953,6 +953,9 @@ cc_library(
|
||||||
}) + select({
|
}) + select({
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
"//mediapipe/gpu:disable_gpu": ["MEDIAPIPE_DISABLE_GPU=1"],
|
"//mediapipe/gpu:disable_gpu": ["MEDIAPIPE_DISABLE_GPU=1"],
|
||||||
|
}) + select({
|
||||||
|
"//conditions:default": [],
|
||||||
|
"//mediapipe/framework/port:disable_opencv": ["MEDIAPIPE_DISABLE_OPENCV=1"],
|
||||||
}) + select({
|
}) + select({
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
"//mediapipe/framework:disable_rtti_and_exceptions": [
|
"//mediapipe/framework:disable_rtti_and_exceptions": [
|
||||||
|
|
|
@ -17,6 +17,14 @@ namespace mediapipe {
|
||||||
namespace api2 {
|
namespace api2 {
|
||||||
namespace builder {
|
namespace builder {
|
||||||
|
|
||||||
|
// Workaround for static_assert(false). Example:
|
||||||
|
// dependent_false<T>::value returns false.
|
||||||
|
// For more information, see:
|
||||||
|
// https://en.cppreference.com/w/cpp/language/if#Constexpr_If
|
||||||
|
// TODO: migrate to a common utility when available.
|
||||||
|
template <class T>
|
||||||
|
struct dependent_false : std::false_type {};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T& GetWithAutoGrow(std::vector<std::unique_ptr<T>>* vecp, int index) {
|
T& GetWithAutoGrow(std::vector<std::unique_ptr<T>>* vecp, int index) {
|
||||||
auto& vec = *vecp;
|
auto& vec = *vecp;
|
||||||
|
@ -209,6 +217,21 @@ class NodeBase {
|
||||||
return SideDestination<true>(&in_sides_[tag]);
|
return SideDestination<true>(&in_sides_[tag]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
|
||||||
|
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
|
||||||
|
if constexpr (std::is_same_v<B, OutputBase>) {
|
||||||
|
return Source<kIsMultiple, T>(&out_streams_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, InputBase>) {
|
||||||
|
return Destination<kIsMultiple, T>(&in_streams_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, SideOutputBase>) {
|
||||||
|
return SideSource<kIsMultiple, T>(&out_sides_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, SideInputBase>) {
|
||||||
|
return SideDestination<kIsMultiple, T>(&in_sides_[port.Tag()]);
|
||||||
|
} else {
|
||||||
|
static_assert(dependent_false<B>::value, "Type not supported.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Convenience methods for accessing purely index-based ports.
|
// Convenience methods for accessing purely index-based ports.
|
||||||
Source<false> Out(int index) { return Out("")[index]; }
|
Source<false> Out(int index) { return Out("")[index]; }
|
||||||
|
|
||||||
|
@ -429,6 +452,24 @@ class Graph {
|
||||||
return Dst(&graph_boundary_.in_sides_[graph_output.Tag()]);
|
return Dst(&graph_boundary_.in_sides_[graph_output.Tag()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
|
||||||
|
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
|
||||||
|
if constexpr (std::is_same_v<B, OutputBase>) {
|
||||||
|
return Destination<kIsMultiple, T>(
|
||||||
|
&graph_boundary_.in_streams_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, InputBase>) {
|
||||||
|
return Source<kIsMultiple, T>(&graph_boundary_.out_streams_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, SideOutputBase>) {
|
||||||
|
return SideDestination<kIsMultiple, T>(
|
||||||
|
&graph_boundary_.in_sides_[port.Tag()]);
|
||||||
|
} else if constexpr (std::is_same_v<B, SideInputBase>) {
|
||||||
|
return SideSource<kIsMultiple, T>(
|
||||||
|
&graph_boundary_.out_sides_[port.Tag()]);
|
||||||
|
} else {
|
||||||
|
static_assert(dependent_false<B>::value, "Type not supported.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the graph config. This can be used to instantiate and run the
|
// Returns the graph config. This can be used to instantiate and run the
|
||||||
// graph.
|
// graph.
|
||||||
CalculatorGraphConfig GetConfig() {
|
CalculatorGraphConfig GetConfig() {
|
||||||
|
|
|
@ -138,6 +138,35 @@ TEST(BuilderTest, TypedMultiple) {
|
||||||
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(BuilderTest, TypedByPorts) {
|
||||||
|
builder::Graph graph;
|
||||||
|
auto& foo = graph.AddNode<Foo>();
|
||||||
|
auto& adder = graph.AddNode<FloatAdder>();
|
||||||
|
|
||||||
|
graph[FooBar1::kIn].SetName("base") >> foo[Foo::kBase];
|
||||||
|
foo[Foo::kOut] >> adder[FloatAdder::kIn][0];
|
||||||
|
foo[Foo::kOut] >> adder[FloatAdder::kIn][1];
|
||||||
|
adder[FloatAdder::kOut].SetName("out") >> graph[FooBar1::kOut];
|
||||||
|
|
||||||
|
CalculatorGraphConfig expected =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||||
|
input_stream: "IN:base"
|
||||||
|
output_stream: "OUT:out"
|
||||||
|
node {
|
||||||
|
calculator: "Foo"
|
||||||
|
input_stream: "BASE:base"
|
||||||
|
output_stream: "OUT:__stream_0"
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
calculator: "FloatAdder"
|
||||||
|
input_stream: "IN:0:__stream_0"
|
||||||
|
input_stream: "IN:1:__stream_0"
|
||||||
|
output_stream: "OUT:out"
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(BuilderTest, PacketGenerator) {
|
TEST(BuilderTest, PacketGenerator) {
|
||||||
builder::Graph graph;
|
builder::Graph graph;
|
||||||
auto& generator = graph.AddPacketGenerator("FloatGenerator");
|
auto& generator = graph.AddPacketGenerator("FloatGenerator");
|
||||||
|
|
|
@ -186,6 +186,7 @@ cc_library(
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"//mediapipe/framework/port:opencv_imgproc",
|
"//mediapipe/framework/port:opencv_imgproc",
|
||||||
],
|
],
|
||||||
|
"//mediapipe/framework/port:disable_opencv": [],
|
||||||
}) + select({
|
}) + select({
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
],
|
],
|
||||||
|
|
|
@ -76,10 +76,7 @@ bool Image::ConvertToGpu() const {
|
||||||
gpu_buffer_ = mediapipe::GpuBuffer(std::move(buffer));
|
gpu_buffer_ = mediapipe::GpuBuffer(std::move(buffer));
|
||||||
#else
|
#else
|
||||||
// GlCalculatorHelperImpl::MakeGlTextureBuffer (CreateSourceTexture)
|
// GlCalculatorHelperImpl::MakeGlTextureBuffer (CreateSourceTexture)
|
||||||
auto buffer = mediapipe::GlTextureBuffer::Create(
|
auto buffer = mediapipe::GlTextureBuffer::Create(*image_frame_);
|
||||||
image_frame_->Width(), image_frame_->Height(),
|
|
||||||
mediapipe::GpuBufferFormatForImageFormat(image_frame_->Format()),
|
|
||||||
image_frame_->PixelData());
|
|
||||||
glBindTexture(GL_TEXTURE_2D, buffer->name());
|
glBindTexture(GL_TEXTURE_2D, buffer->name());
|
||||||
// See GlCalculatorHelperImpl::SetStandardTextureParams
|
// See GlCalculatorHelperImpl::SetStandardTextureParams
|
||||||
glTexParameteri(buffer->target(), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
glTexParameteri(buffer->target(), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
|
|
@ -32,7 +32,12 @@
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#if !defined(LOCATION_OPENCV)
|
#if !defined(LOCATION_OPENCV)
|
||||||
# define LOCATION_OPENCV 1
|
# if !MEDIAPIPE_DISABLE_OPENCV && \
|
||||||
|
(!defined(MEDIAPIPE_MOBILE) || defined(MEDIAPIPE_ANDROID_OPENCV))
|
||||||
|
# define LOCATION_OPENCV 1
|
||||||
|
# else
|
||||||
|
# define LOCATION_OPENCV 0
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LOCATION_OPENCV
|
#if LOCATION_OPENCV
|
||||||
|
|
|
@ -158,12 +158,12 @@ cc_library(
|
||||||
hdrs = [
|
hdrs = [
|
||||||
"gmock.h",
|
"gmock.h",
|
||||||
"gtest.h",
|
"gtest.h",
|
||||||
|
"gtest-spi.h",
|
||||||
"status_matchers.h",
|
"status_matchers.h",
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
":status_matchers",
|
":status_matchers",
|
||||||
"//mediapipe/framework:port",
|
|
||||||
"@com_google_googletest//:gtest",
|
"@com_google_googletest//:gtest",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -174,12 +174,12 @@ cc_library(
|
||||||
hdrs = [
|
hdrs = [
|
||||||
"gmock.h",
|
"gmock.h",
|
||||||
"gtest.h",
|
"gtest.h",
|
||||||
|
"gtest-spi.h",
|
||||||
"status_matchers.h",
|
"status_matchers.h",
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
":status_matchers",
|
":status_matchers",
|
||||||
"//mediapipe/framework:port",
|
|
||||||
"//mediapipe/framework/deps:status_matchers",
|
"//mediapipe/framework/deps:status_matchers",
|
||||||
"@com_google_googletest//:gtest_main",
|
"@com_google_googletest//:gtest_main",
|
||||||
],
|
],
|
||||||
|
@ -217,6 +217,16 @@ cc_library(
|
||||||
deps = ["//mediapipe/framework/deps:numbers"],
|
deps = ["//mediapipe/framework/deps:numbers"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Disabling opencv when defining MEDIAPIPE_DISABLE_OPENCV to 1 in the bazel command.
|
||||||
|
# Note that this only applies to a select few calculators/framework components currently.
|
||||||
|
config_setting(
|
||||||
|
name = "disable_opencv",
|
||||||
|
define_values = {
|
||||||
|
"MEDIAPIPE_DISABLE_OPENCV": "1",
|
||||||
|
},
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "opencv_core",
|
name = "opencv_core",
|
||||||
hdrs = ["opencv_core_inc.h"],
|
hdrs = ["opencv_core_inc.h"],
|
||||||
|
|
20
mediapipe/framework/port/gtest-spi.h
Normal file
20
mediapipe/framework/port/gtest-spi.h
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_PORT_GTEST_SPI_H_
|
||||||
|
#define MEDIAPIPE_PORT_GTEST_SPI_H_
|
||||||
|
|
||||||
|
#include "gtest/gtest-spi.h"
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_PORT_GTEST_SPI_H_
|
|
@ -585,6 +585,7 @@ cc_library(
|
||||||
"//mediapipe:apple": [
|
"//mediapipe:apple": [
|
||||||
":gl_calculator_helper_ios",
|
":gl_calculator_helper_ios",
|
||||||
"//mediapipe/objc:util",
|
"//mediapipe/objc:util",
|
||||||
|
"//mediapipe/objc:CFHolder",
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
@ -714,11 +715,12 @@ cc_library(
|
||||||
deps = [
|
deps = [
|
||||||
":gl_calculator_helper",
|
":gl_calculator_helper",
|
||||||
"//mediapipe/framework:calculator_framework",
|
"//mediapipe/framework:calculator_framework",
|
||||||
"//mediapipe/framework:timestamp",
|
|
||||||
"//mediapipe/framework/formats:image_frame",
|
"//mediapipe/framework/formats:image_frame",
|
||||||
"//mediapipe/framework/port:ret_check",
|
|
||||||
"//mediapipe/framework/port:status",
|
"//mediapipe/framework/port:status",
|
||||||
],
|
] + select({
|
||||||
|
"//conditions:default": [],
|
||||||
|
"//mediapipe:apple": ["//mediapipe/objc:util"],
|
||||||
|
}),
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
#include "mediapipe/gpu/gl_calculator_helper_impl.h"
|
#include "mediapipe/gpu/gl_calculator_helper_impl.h"
|
||||||
#include "mediapipe/gpu/gpu_buffer_format.h"
|
#include "mediapipe/gpu/gpu_buffer_format.h"
|
||||||
#include "mediapipe/gpu/gpu_shared_data_internal.h"
|
#include "mediapipe/gpu/gpu_shared_data_internal.h"
|
||||||
|
@ -176,10 +179,8 @@ GlTexture GlCalculatorHelperImpl::MapGlTextureBuffer(
|
||||||
GlTextureBufferSharedPtr GlCalculatorHelperImpl::MakeGlTextureBuffer(
|
GlTextureBufferSharedPtr GlCalculatorHelperImpl::MakeGlTextureBuffer(
|
||||||
const ImageFrame& image_frame) {
|
const ImageFrame& image_frame) {
|
||||||
CHECK(gl_context_->IsCurrent());
|
CHECK(gl_context_->IsCurrent());
|
||||||
auto buffer = GlTextureBuffer::Create(
|
|
||||||
image_frame.Width(), image_frame.Height(),
|
auto buffer = GlTextureBuffer::Create(image_frame);
|
||||||
GpuBufferFormatForImageFormat(image_frame.Format()),
|
|
||||||
image_frame.PixelData());
|
|
||||||
|
|
||||||
if (buffer->format_ != GpuBufferFormat::kUnknown) {
|
if (buffer->format_ != GpuBufferFormat::kUnknown) {
|
||||||
glBindTexture(GL_TEXTURE_2D, buffer->name_);
|
glBindTexture(GL_TEXTURE_2D, buffer->name_);
|
||||||
|
|
|
@ -32,15 +32,56 @@ std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Wrap(
|
||||||
|
|
||||||
std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Create(int width, int height,
|
std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Create(int width, int height,
|
||||||
GpuBufferFormat format,
|
GpuBufferFormat format,
|
||||||
const void* data) {
|
const void* data,
|
||||||
|
int alignment) {
|
||||||
auto buf = absl::make_unique<GlTextureBuffer>(GL_TEXTURE_2D, 0, width, height,
|
auto buf = absl::make_unique<GlTextureBuffer>(GL_TEXTURE_2D, 0, width, height,
|
||||||
format, nullptr);
|
format, nullptr);
|
||||||
if (!buf->CreateInternal(data)) {
|
if (!buf->CreateInternal(data, alignment)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int AlignedToPowerOf2(int value, int alignment) {
|
||||||
|
// alignment must be a power of 2
|
||||||
|
return ((value - 1) | (alignment - 1)) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Create(
|
||||||
|
const ImageFrame& image_frame) {
|
||||||
|
int base_ws = image_frame.Width() * image_frame.NumberOfChannels() *
|
||||||
|
image_frame.ByteDepth();
|
||||||
|
int actual_ws = image_frame.WidthStep();
|
||||||
|
int alignment = 0;
|
||||||
|
std::unique_ptr<ImageFrame> temp;
|
||||||
|
const uint8* data = image_frame.PixelData();
|
||||||
|
|
||||||
|
// Let's see if the pixel data is tightly aligned to one of the alignments
|
||||||
|
// supported by OpenGL, preferring 4 if possible since it's the default.
|
||||||
|
if (actual_ws == AlignedToPowerOf2(base_ws, 4))
|
||||||
|
alignment = 4;
|
||||||
|
else if (actual_ws == AlignedToPowerOf2(base_ws, 1))
|
||||||
|
alignment = 1;
|
||||||
|
else if (actual_ws == AlignedToPowerOf2(base_ws, 2))
|
||||||
|
alignment = 2;
|
||||||
|
else if (actual_ws == AlignedToPowerOf2(base_ws, 8))
|
||||||
|
alignment = 8;
|
||||||
|
|
||||||
|
// If no GL-compatible alignment was found, we copy the data to a temporary
|
||||||
|
// buffer, aligned to 4. We do this using another ImageFrame purely for
|
||||||
|
// convenience.
|
||||||
|
if (!alignment) {
|
||||||
|
temp = std::make_unique<ImageFrame>();
|
||||||
|
temp->CopyFrom(image_frame, 4);
|
||||||
|
data = temp->PixelData();
|
||||||
|
alignment = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Create(image_frame.Width(), image_frame.Height(),
|
||||||
|
GpuBufferFormatForImageFormat(image_frame.Format()), data,
|
||||||
|
alignment);
|
||||||
|
}
|
||||||
|
|
||||||
GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width,
|
GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width,
|
||||||
int height, GpuBufferFormat format,
|
int height, GpuBufferFormat format,
|
||||||
DeletionCallback deletion_callback,
|
DeletionCallback deletion_callback,
|
||||||
|
@ -53,7 +94,7 @@ GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width,
|
||||||
deletion_callback_(deletion_callback),
|
deletion_callback_(deletion_callback),
|
||||||
producer_context_(producer_context) {}
|
producer_context_(producer_context) {}
|
||||||
|
|
||||||
bool GlTextureBuffer::CreateInternal(const void* data) {
|
bool GlTextureBuffer::CreateInternal(const void* data, int alignment) {
|
||||||
auto context = GlContext::GetCurrent();
|
auto context = GlContext::GetCurrent();
|
||||||
if (!context) return false;
|
if (!context) return false;
|
||||||
|
|
||||||
|
@ -66,8 +107,11 @@ bool GlTextureBuffer::CreateInternal(const void* data) {
|
||||||
GlTextureInfo info =
|
GlTextureInfo info =
|
||||||
GlTextureInfoForGpuBufferFormat(format_, 0, context->GetGlVersion());
|
GlTextureInfoForGpuBufferFormat(format_, 0, context->GetGlVersion());
|
||||||
|
|
||||||
|
if (alignment != 4 && data) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
|
||||||
|
|
||||||
// See b/70294573 for details about this.
|
// See b/70294573 for details about this.
|
||||||
if (info.gl_internal_format == GL_RGBA16F &&
|
if (info.gl_internal_format == GL_RGBA16F &&
|
||||||
|
context->GetGlVersion() != GlVersion::kGLES2 &&
|
||||||
SymbolAvailable(&glTexStorage2D)) {
|
SymbolAvailable(&glTexStorage2D)) {
|
||||||
CHECK(data == nullptr) << "unimplemented";
|
CHECK(data == nullptr) << "unimplemented";
|
||||||
glTexStorage2D(target_, 1, info.gl_internal_format, width_, height_);
|
glTexStorage2D(target_, 1, info.gl_internal_format, width_, height_);
|
||||||
|
@ -76,6 +120,8 @@ bool GlTextureBuffer::CreateInternal(const void* data) {
|
||||||
height_, 0 /* border */, info.gl_format, info.gl_type, data);
|
height_, 0 /* border */, info.gl_format, info.gl_type, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (alignment != 4 && data) glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
|
||||||
|
|
||||||
glBindTexture(target_, 0);
|
glBindTexture(target_, 0);
|
||||||
|
|
||||||
// Use the deletion callback to delete the texture on the context
|
// Use the deletion callback to delete the texture on the context
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
|
||||||
#include "absl/memory/memory.h"
|
#include "absl/memory/memory.h"
|
||||||
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
#include "mediapipe/gpu/gl_base.h"
|
#include "mediapipe/gpu/gl_base.h"
|
||||||
#include "mediapipe/gpu/gl_context.h"
|
#include "mediapipe/gpu/gl_context.h"
|
||||||
#include "mediapipe/gpu/gpu_buffer_format.h"
|
#include "mediapipe/gpu/gpu_buffer_format.h"
|
||||||
|
@ -60,7 +61,11 @@ class GlTextureBuffer {
|
||||||
// provided later via glTexSubImage2D.
|
// provided later via glTexSubImage2D.
|
||||||
static std::unique_ptr<GlTextureBuffer> Create(int width, int height,
|
static std::unique_ptr<GlTextureBuffer> Create(int width, int height,
|
||||||
GpuBufferFormat format,
|
GpuBufferFormat format,
|
||||||
const void* data = nullptr);
|
const void* data = nullptr,
|
||||||
|
int alignment = 4);
|
||||||
|
|
||||||
|
// Create a texture with a copy of the data in image_frame.
|
||||||
|
static std::unique_ptr<GlTextureBuffer> Create(const ImageFrame& image_frame);
|
||||||
|
|
||||||
// Wraps an existing texture, but does not take ownership of it.
|
// Wraps an existing texture, but does not take ownership of it.
|
||||||
// deletion_callback is invoked when the GlTextureBuffer is released, so
|
// deletion_callback is invoked when the GlTextureBuffer is released, so
|
||||||
|
@ -127,7 +132,7 @@ class GlTextureBuffer {
|
||||||
// If data is provided, it is uploaded to the texture; otherwise, it can be
|
// If data is provided, it is uploaded to the texture; otherwise, it can be
|
||||||
// provided later via glTexSubImage2D.
|
// provided later via glTexSubImage2D.
|
||||||
// Returns true on success.
|
// Returns true on success.
|
||||||
bool CreateInternal(const void* data = nullptr);
|
bool CreateInternal(const void* data, int alignment = 4);
|
||||||
|
|
||||||
friend class GlCalculatorHelperImpl;
|
friend class GlCalculatorHelperImpl;
|
||||||
|
|
||||||
|
|
|
@ -51,8 +51,6 @@ namespace mediapipe {
|
||||||
constexpr int kMaxShaderInfoLength = 1024;
|
constexpr int kMaxShaderInfoLength = 1024;
|
||||||
|
|
||||||
GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
|
GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
|
||||||
GLint status;
|
|
||||||
|
|
||||||
*shader = glCreateShader(target);
|
*shader = glCreateShader(target);
|
||||||
if (*shader == 0) {
|
if (*shader == 0) {
|
||||||
return GL_FALSE;
|
return GL_FALSE;
|
||||||
|
@ -62,6 +60,11 @@ GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
|
||||||
|
|
||||||
GL_DEBUG_LOG(Shader, *shader, "compile");
|
GL_DEBUG_LOG(Shader, *shader, "compile");
|
||||||
|
|
||||||
|
#if UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
|
||||||
|
return GL_TRUE;
|
||||||
|
#else
|
||||||
|
GLint status;
|
||||||
|
|
||||||
glGetShaderiv(*shader, GL_COMPILE_STATUS, &status);
|
glGetShaderiv(*shader, GL_COMPILE_STATUS, &status);
|
||||||
LOG_IF(ERROR, status == GL_FALSE) << "Failed to compile shader:\n" << source;
|
LOG_IF(ERROR, status == GL_FALSE) << "Failed to compile shader:\n" << source;
|
||||||
|
|
||||||
|
@ -72,19 +75,24 @@ GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
|
||||||
LOG(ERROR) << "Error message: " << std::string(cmessage, length);
|
LOG(ERROR) << "Error message: " << std::string(cmessage, length);
|
||||||
}
|
}
|
||||||
return status;
|
return status;
|
||||||
|
#endif // UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
|
||||||
}
|
}
|
||||||
|
|
||||||
GLint GlhLinkProgram(GLuint program) {
|
GLint GlhLinkProgram(GLuint program) {
|
||||||
GLint status;
|
|
||||||
|
|
||||||
glLinkProgram(program);
|
glLinkProgram(program);
|
||||||
|
|
||||||
|
#if UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
|
||||||
|
return GL_TRUE;
|
||||||
|
#else
|
||||||
|
GLint status;
|
||||||
|
|
||||||
GL_DEBUG_LOG(Program, program, "link");
|
GL_DEBUG_LOG(Program, program, "link");
|
||||||
|
|
||||||
glGetProgramiv(program, GL_LINK_STATUS, &status);
|
glGetProgramiv(program, GL_LINK_STATUS, &status);
|
||||||
LOG_IF(ERROR, status == GL_FALSE) << "Failed to link program " << program;
|
LOG_IF(ERROR, status == GL_FALSE) << "Failed to link program " << program;
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
|
#endif // UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
|
||||||
}
|
}
|
||||||
|
|
||||||
GLint GlhValidateProgram(GLuint program) {
|
GLint GlhValidateProgram(GLuint program) {
|
||||||
|
|
|
@ -11,6 +11,10 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
load(
|
||||||
|
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||||
|
"mediapipe_binary_graph",
|
||||||
|
)
|
||||||
|
|
||||||
licenses(["notice"])
|
licenses(["notice"])
|
||||||
|
|
||||||
|
@ -24,8 +28,8 @@ cc_library(
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
|
||||||
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front_cpu",
|
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front_gpu",
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -35,7 +39,7 @@ cc_library(
|
||||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front_cpu",
|
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -45,15 +49,10 @@ cc_library(
|
||||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front_gpu",
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
load(
|
|
||||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
|
||||||
"mediapipe_binary_graph",
|
|
||||||
)
|
|
||||||
|
|
||||||
mediapipe_binary_graph(
|
mediapipe_binary_graph(
|
||||||
name = "face_detection_mobile_cpu_binary_graph",
|
name = "face_detection_mobile_cpu_binary_graph",
|
||||||
graph = "face_detection_mobile_cpu.pbtxt",
|
graph = "face_detection_mobile_cpu.pbtxt",
|
||||||
|
@ -67,3 +66,30 @@ mediapipe_binary_graph(
|
||||||
output_name = "face_detection_mobile_gpu.binarypb",
|
output_name = "face_detection_mobile_gpu.binarypb",
|
||||||
deps = [":mobile_calculators"],
|
deps = [":mobile_calculators"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_mobile_gpu_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_gpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_binary_graph(
|
||||||
|
name = "face_detection_full_range_mobile_gpu_binary_graph",
|
||||||
|
graph = "face_detection_full_range_mobile_gpu.pbtxt",
|
||||||
|
output_name = "face_detection_full_range_mobile_gpu.binarypb",
|
||||||
|
deps = [":face_detection_full_range_mobile_gpu_deps"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_desktop_live_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -1,169 +0,0 @@
|
||||||
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
|
|
||||||
# Used in the examples in
|
|
||||||
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
|
|
||||||
|
|
||||||
# Images on GPU coming into and out of the graph.
|
|
||||||
input_stream: "input_video"
|
|
||||||
output_stream: "output_video"
|
|
||||||
|
|
||||||
# Throttles the images flowing downstream for flow control. It passes through
|
|
||||||
# the very first incoming image unaltered, and waits for
|
|
||||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
|
||||||
# generating the corresponding detections before it passes through another
|
|
||||||
# image. All images that come in while waiting are dropped, limiting the number
|
|
||||||
# of in-flight images between this calculator and
|
|
||||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
|
||||||
# from queuing up incoming images and data excessively, which leads to increased
|
|
||||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
|
||||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
|
||||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
|
||||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
|
||||||
# processing previous inputs.
|
|
||||||
node {
|
|
||||||
calculator: "FlowLimiterCalculator"
|
|
||||||
input_stream: "input_video"
|
|
||||||
input_stream: "FINISHED:detections"
|
|
||||||
input_stream_info: {
|
|
||||||
tag_index: "FINISHED"
|
|
||||||
back_edge: true
|
|
||||||
}
|
|
||||||
output_stream: "throttled_input_video"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Transforms the input image on CPU to a 128x128 image. To scale the input
|
|
||||||
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
|
|
||||||
# resulting in potential letterboxing in the transformed image.
|
|
||||||
node: {
|
|
||||||
calculator: "ImageTransformationCalculator"
|
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
|
||||||
output_stream: "IMAGE:transformed_input_video_cpu"
|
|
||||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
|
||||||
output_width: 192
|
|
||||||
output_height: 192
|
|
||||||
scale_mode: FIT
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Converts the transformed input image on CPU into an image tensor stored as a
|
|
||||||
# TfLiteTensor.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteConverterCalculator"
|
|
||||||
input_stream: "IMAGE:transformed_input_video_cpu"
|
|
||||||
output_stream: "TENSORS:image_tensor"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
|
||||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
|
||||||
# scores.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteInferenceCalculator"
|
|
||||||
input_stream: "TENSORS:image_tensor"
|
|
||||||
output_stream: "TENSORS:detection_tensors"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
|
||||||
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generates a single side packet containing a vector of SSD anchors based on
|
|
||||||
# the specification in the options.
|
|
||||||
node {
|
|
||||||
calculator: "SsdAnchorsCalculator"
|
|
||||||
output_side_packet: "anchors"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
|
||||||
num_layers: 1
|
|
||||||
min_scale: 0.1484375
|
|
||||||
max_scale: 0.75
|
|
||||||
input_size_height: 192
|
|
||||||
input_size_width: 192
|
|
||||||
anchor_offset_x: 0.5
|
|
||||||
anchor_offset_y: 0.5
|
|
||||||
strides: 4
|
|
||||||
aspect_ratios: 1.0
|
|
||||||
fixed_anchor_size: true
|
|
||||||
interpolated_scale_aspect_ratio: 0.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
|
||||||
# the SSD anchors and the specification in the options, into a vector of
|
|
||||||
# detections. Each detection describes a detected object.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
|
||||||
input_stream: "TENSORS:detection_tensors"
|
|
||||||
input_side_packet: "ANCHORS:anchors"
|
|
||||||
output_stream: "DETECTIONS:detections"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
|
||||||
num_classes: 1
|
|
||||||
num_boxes: 2304
|
|
||||||
num_coords: 16
|
|
||||||
box_coord_offset: 0
|
|
||||||
keypoint_coord_offset: 4
|
|
||||||
num_keypoints: 6
|
|
||||||
num_values_per_keypoint: 2
|
|
||||||
sigmoid_score: true
|
|
||||||
score_clipping_thresh: 100.0
|
|
||||||
reverse_output_order: true
|
|
||||||
x_scale: 192.0
|
|
||||||
y_scale: 192.0
|
|
||||||
h_scale: 192.0
|
|
||||||
w_scale: 192.0
|
|
||||||
min_score_thresh: 0.6
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Performs non-max suppression to remove excessive detections.
|
|
||||||
node {
|
|
||||||
calculator: "NonMaxSuppressionCalculator"
|
|
||||||
input_stream: "detections"
|
|
||||||
output_stream: "filtered_detections"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
|
||||||
min_suppression_threshold: 0.3
|
|
||||||
overlap_type: INTERSECTION_OVER_UNION
|
|
||||||
algorithm: WEIGHTED
|
|
||||||
return_empty_detections: true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
|
||||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
|
||||||
# corresponding locations on the same image with the letterbox removed (the
|
|
||||||
# input image to the graph before image transformation).
|
|
||||||
node {
|
|
||||||
calculator: "DetectionLetterboxRemovalCalculator"
|
|
||||||
input_stream: "DETECTIONS:filtered_detections"
|
|
||||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
||||||
output_stream: "DETECTIONS:output_detections"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Converts the detections to drawing primitives for annotation overlay.
|
|
||||||
node {
|
|
||||||
calculator: "DetectionsToRenderDataCalculator"
|
|
||||||
input_stream: "DETECTIONS:output_detections"
|
|
||||||
output_stream: "RENDER_DATA:render_data"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
|
||||||
thickness: 4.0
|
|
||||||
color { r: 255 g: 0 b: 0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Draws annotations and overlays them on top of the input images.
|
|
||||||
node {
|
|
||||||
calculator: "AnnotationOverlayCalculator"
|
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
|
||||||
input_stream: "render_data"
|
|
||||||
output_stream: "IMAGE:output_video"
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,169 +0,0 @@
|
||||||
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
|
|
||||||
# Used in the examples in
|
|
||||||
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
|
|
||||||
# mediapipie/examples/ios/facedetectiongpu.
|
|
||||||
|
|
||||||
# Images on GPU coming into and out of the graph.
|
|
||||||
input_stream: "input_video"
|
|
||||||
output_stream: "output_video"
|
|
||||||
|
|
||||||
# Throttles the images flowing downstream for flow control. It passes through
|
|
||||||
# the very first incoming image unaltered, and waits for
|
|
||||||
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
|
||||||
# generating the corresponding detections before it passes through another
|
|
||||||
# image. All images that come in while waiting are dropped, limiting the number
|
|
||||||
# of in-flight images between this calculator and
|
|
||||||
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
|
||||||
# from queuing up incoming images and data excessively, which leads to increased
|
|
||||||
# latency and memory usage, unwanted in real-time mobile applications. It also
|
|
||||||
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
|
||||||
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
|
||||||
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
|
||||||
# processing previous inputs.
|
|
||||||
node {
|
|
||||||
calculator: "FlowLimiterCalculator"
|
|
||||||
input_stream: "input_video"
|
|
||||||
input_stream: "FINISHED:detections"
|
|
||||||
input_stream_info: {
|
|
||||||
tag_index: "FINISHED"
|
|
||||||
back_edge: true
|
|
||||||
}
|
|
||||||
output_stream: "throttled_input_video"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Transforms the input image on GPU to a 128x128 image. To scale the input
|
|
||||||
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
|
|
||||||
# resulting in potential letterboxing in the transformed image.
|
|
||||||
node: {
|
|
||||||
calculator: "ImageTransformationCalculator"
|
|
||||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
|
||||||
output_stream: "IMAGE_GPU:transformed_input_video"
|
|
||||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
|
||||||
output_width: 192
|
|
||||||
output_height: 192
|
|
||||||
scale_mode: FIT
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Converts the transformed input image on GPU into an image tensor stored as a
|
|
||||||
# TfLiteTensor.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteConverterCalculator"
|
|
||||||
input_stream: "IMAGE_GPU:transformed_input_video"
|
|
||||||
output_stream: "TENSORS_GPU:image_tensor"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
|
||||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
|
||||||
# scores.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteInferenceCalculator"
|
|
||||||
input_stream: "TENSORS_GPU:image_tensor"
|
|
||||||
output_stream: "TENSORS_GPU:detection_tensors"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
|
||||||
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generates a single side packet containing a vector of SSD anchors based on
|
|
||||||
# the specification in the options.
|
|
||||||
node {
|
|
||||||
calculator: "SsdAnchorsCalculator"
|
|
||||||
output_side_packet: "anchors"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
|
|
||||||
num_layers: 1
|
|
||||||
min_scale: 0.1484375
|
|
||||||
max_scale: 0.75
|
|
||||||
input_size_height: 192
|
|
||||||
input_size_width: 192
|
|
||||||
anchor_offset_x: 0.5
|
|
||||||
anchor_offset_y: 0.5
|
|
||||||
strides: 4
|
|
||||||
aspect_ratios: 1.0
|
|
||||||
fixed_anchor_size: true
|
|
||||||
interpolated_scale_aspect_ratio: 0.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
|
||||||
# the SSD anchors and the specification in the options, into a vector of
|
|
||||||
# detections. Each detection describes a detected object.
|
|
||||||
node {
|
|
||||||
calculator: "TfLiteTensorsToDetectionsCalculator"
|
|
||||||
input_stream: "TENSORS_GPU:detection_tensors"
|
|
||||||
input_side_packet: "ANCHORS:anchors"
|
|
||||||
output_stream: "DETECTIONS:detections"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
|
|
||||||
num_classes: 1
|
|
||||||
num_boxes: 2304
|
|
||||||
num_coords: 16
|
|
||||||
box_coord_offset: 0
|
|
||||||
keypoint_coord_offset: 4
|
|
||||||
num_keypoints: 6
|
|
||||||
num_values_per_keypoint: 2
|
|
||||||
sigmoid_score: true
|
|
||||||
score_clipping_thresh: 100.0
|
|
||||||
reverse_output_order: true
|
|
||||||
x_scale: 192.0
|
|
||||||
y_scale: 192.0
|
|
||||||
h_scale: 192.0
|
|
||||||
w_scale: 192.0
|
|
||||||
min_score_thresh: 0.6
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Performs non-max suppression to remove excessive detections.
|
|
||||||
node {
|
|
||||||
calculator: "NonMaxSuppressionCalculator"
|
|
||||||
input_stream: "detections"
|
|
||||||
output_stream: "filtered_detections"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
|
|
||||||
min_suppression_threshold: 0.3
|
|
||||||
overlap_type: INTERSECTION_OVER_UNION
|
|
||||||
algorithm: WEIGHTED
|
|
||||||
return_empty_detections: true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
|
||||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
|
||||||
# corresponding locations on the same image with the letterbox removed (the
|
|
||||||
# input image to the graph before image transformation).
|
|
||||||
node {
|
|
||||||
calculator: "DetectionLetterboxRemovalCalculator"
|
|
||||||
input_stream: "DETECTIONS:filtered_detections"
|
|
||||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
||||||
output_stream: "DETECTIONS:output_detections"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Converts the detections to drawing primitives for annotation overlay.
|
|
||||||
node {
|
|
||||||
calculator: "DetectionsToRenderDataCalculator"
|
|
||||||
input_stream: "DETECTIONS:output_detections"
|
|
||||||
output_stream: "RENDER_DATA:render_data"
|
|
||||||
node_options: {
|
|
||||||
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
|
||||||
thickness: 4.0
|
|
||||||
color { r: 255 g: 0 b: 0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Draws annotations and overlays them on top of the input images.
|
|
||||||
node {
|
|
||||||
calculator: "AnnotationOverlayCalculator"
|
|
||||||
input_stream: "IMAGE_GPU:throttled_input_video"
|
|
||||||
input_stream: "render_data"
|
|
||||||
output_stream: "IMAGE_GPU:output_video"
|
|
||||||
}
|
|
|
@ -31,7 +31,7 @@ node {
|
||||||
|
|
||||||
# Subgraph that detects faces.
|
# Subgraph that detects faces.
|
||||||
node {
|
node {
|
||||||
calculator: "FaceDetectionFrontCpu"
|
calculator: "FaceDetectionShortRangeCpu"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
output_stream: "DETECTIONS:face_detections"
|
output_stream: "DETECTIONS:face_detections"
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeCpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
|
||||||
|
# Used in the examples in
|
||||||
|
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
|
||||||
|
# mediapipie/examples/ios/facedetectiongpu.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeGpu"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE_GPU:output_video"
|
||||||
|
}
|
|
@ -41,7 +41,7 @@ node: {
|
||||||
|
|
||||||
# Subgraph that detects faces.
|
# Subgraph that detects faces.
|
||||||
node {
|
node {
|
||||||
calculator: "FaceDetectionFrontCpu"
|
calculator: "FaceDetectionShortRangeCpu"
|
||||||
input_stream: "IMAGE:input_video_cpu"
|
input_stream: "IMAGE:input_video_cpu"
|
||||||
output_stream: "DETECTIONS:face_detections"
|
output_stream: "DETECTIONS:face_detections"
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ node {
|
||||||
|
|
||||||
# Subgraph that detects faces.
|
# Subgraph that detects faces.
|
||||||
node {
|
node {
|
||||||
calculator: "FaceDetectionFrontGpu"
|
calculator: "FaceDetectionShortRangeGpu"
|
||||||
input_stream: "IMAGE:throttled_input_video"
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
output_stream: "DETECTIONS:face_detections"
|
output_stream: "DETECTIONS:face_detections"
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ mediapipe_simple_subgraph(
|
||||||
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
|
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
|
||||||
"//mediapipe/calculators/core:split_vector_calculator",
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
"//mediapipe/calculators/image:image_properties_calculator",
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
"//mediapipe/modules/face_detection:face_detection_front_gpu",
|
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||||
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
|
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -24,7 +24,7 @@ output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||||
# Subgraph that detects faces and corresponding landmarks using the face
|
# Subgraph that detects faces and corresponding landmarks using the face
|
||||||
# detection pipeline.
|
# detection pipeline.
|
||||||
node {
|
node {
|
||||||
calculator: "FaceDetectionFrontGpu"
|
calculator: "FaceDetectionShortRangeGpu"
|
||||||
input_stream: "IMAGE:input_image"
|
input_stream: "IMAGE:input_image"
|
||||||
output_stream: "DETECTIONS:multi_face_detection"
|
output_stream: "DETECTIONS:multi_face_detection"
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ package(default_visibility = ["//visibility:public"])
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "renderer_calculators",
|
name = "renderer_calculators",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
|
|
@ -30,7 +30,7 @@ mediapipe_simple_subgraph(
|
||||||
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
"//mediapipe/calculators/core:merge_calculator",
|
"//mediapipe/calculators/core:merge_calculator",
|
||||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
"//mediapipe/calculators/core:split_vector_calculator",
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
|
|
@ -26,7 +26,7 @@ cc_library(
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
|
||||||
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
"//mediapipe/calculators/core:concatenate_vector_calculator",
|
||||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
|
|
@ -26,7 +26,7 @@ mediapipe_simple_subgraph(
|
||||||
graph = "pose_renderer_gpu.pbtxt",
|
graph = "pose_renderer_gpu.pbtxt",
|
||||||
register_as = "PoseRendererGpu",
|
register_as = "PoseRendererGpu",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
@ -40,7 +40,7 @@ mediapipe_simple_subgraph(
|
||||||
graph = "pose_renderer_cpu.pbtxt",
|
graph = "pose_renderer_cpu.pbtxt",
|
||||||
register_as = "PoseRendererCpu",
|
register_as = "PoseRendererCpu",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
|
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||||
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
|
||||||
|
|
|
@ -91,15 +91,15 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
GLES20.glViewport(0, 0, width, height);
|
GLES20.glViewport(0, 0, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/** Renders the frame. Note that the {@link #flush} method must be called afterwards. */
|
||||||
public void onDrawFrame(GL10 gl) {
|
protected TextureFrame renderFrame() {
|
||||||
TextureFrame frame = nextFrame.getAndSet(null);
|
TextureFrame frame = nextFrame.getAndSet(null);
|
||||||
|
|
||||||
GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT);
|
GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT);
|
||||||
ShaderUtil.checkGlError("glClear");
|
ShaderUtil.checkGlError("glClear");
|
||||||
|
|
||||||
if (surfaceTexture == null && frame == null) {
|
if (surfaceTexture == null && frame == null) {
|
||||||
return;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
|
GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
|
||||||
|
@ -161,14 +161,28 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
|
||||||
GLES20.glBindTexture(textureTarget, 0);
|
GLES20.glBindTexture(textureTarget, 0);
|
||||||
ShaderUtil.checkGlError("unbind surfaceTexture");
|
ShaderUtil.checkGlError("unbind surfaceTexture");
|
||||||
|
|
||||||
// We must flush before releasing the frame.
|
return frame;
|
||||||
GLES20.glFlush();
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calls {@link #GLES20.glFlush} and releases the texture frame. Should be invoked after the
|
||||||
|
* {@link #renderFrame} method is called.
|
||||||
|
*
|
||||||
|
* @param frame the {@link TextureFrame} to be released after {@link #GLES20.glFlush}.
|
||||||
|
*/
|
||||||
|
protected void flush(TextureFrame frame) {
|
||||||
|
GLES20.glFlush();
|
||||||
if (frame != null) {
|
if (frame != null) {
|
||||||
frame.release();
|
frame.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onDrawFrame(GL10 gl) {
|
||||||
|
TextureFrame frame = renderFrame();
|
||||||
|
flush(frame);
|
||||||
|
}
|
||||||
|
|
||||||
public void setTextureTarget(int target) {
|
public void setTextureTarget(int target) {
|
||||||
if (program != 0) {
|
if (program != 0) {
|
||||||
throw new IllegalStateException(
|
throw new IllegalStateException(
|
||||||
|
|
|
@ -16,7 +16,6 @@ package com.google.mediapipe.framework;
|
||||||
|
|
||||||
import android.graphics.Bitmap;
|
import android.graphics.Bitmap;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
// TODO: use Preconditions in this file.
|
// TODO: use Preconditions in this file.
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -19,7 +19,7 @@ package com.google.mediapipe.framework;
|
||||||
* has reached the specified point in the sequence of commands it is executing. This can be
|
* has reached the specified point in the sequence of commands it is executing. This can be
|
||||||
* necessary when working with multiple GL contexts.
|
* necessary when working with multiple GL contexts.
|
||||||
*/
|
*/
|
||||||
final class GraphGlSyncToken implements GlSyncToken {
|
public final class GraphGlSyncToken implements GlSyncToken {
|
||||||
private long token;
|
private long token;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -44,7 +44,7 @@ final class GraphGlSyncToken implements GlSyncToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GraphGlSyncToken(long token) {
|
public GraphGlSyncToken(long token) {
|
||||||
this.token = token;
|
this.token = token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -73,6 +73,7 @@ cc_library(
|
||||||
],
|
],
|
||||||
"//mediapipe/gpu:disable_gpu": [],
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
}),
|
}),
|
||||||
|
features = ["-no_undefined"],
|
||||||
linkopts = select({
|
linkopts = select({
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
"//mediapipe:android": [
|
"//mediapipe:android": [
|
||||||
|
|
|
@ -583,9 +583,9 @@ absl::Status Graph::SetParentGlContext(int64 java_gl_context) {
|
||||||
#if MEDIAPIPE_DISABLE_GPU
|
#if MEDIAPIPE_DISABLE_GPU
|
||||||
LOG(FATAL) << "GPU support has been disabled in this build!";
|
LOG(FATAL) << "GPU support has been disabled in this build!";
|
||||||
#else
|
#else
|
||||||
gpu_resources_ = mediapipe::GpuResources::Create(
|
ASSIGN_OR_RETURN(gpu_resources_,
|
||||||
reinterpret_cast<EGLContext>(java_gl_context))
|
mediapipe::GpuResources::Create(
|
||||||
.value();
|
reinterpret_cast<EGLContext>(java_gl_context)));
|
||||||
#endif // MEDIAPIPE_DISABLE_GPU
|
#endif // MEDIAPIPE_DISABLE_GPU
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,7 @@ load("@build_bazel_rules_android//android:rules.bzl", "android_binary", "android
|
||||||
def mediapipe_aar(
|
def mediapipe_aar(
|
||||||
name,
|
name,
|
||||||
srcs = [],
|
srcs = [],
|
||||||
|
gen_libmediapipe = True,
|
||||||
calculators = [],
|
calculators = [],
|
||||||
assets = [],
|
assets = [],
|
||||||
assets_dir = ""):
|
assets_dir = ""):
|
||||||
|
@ -54,12 +55,14 @@ def mediapipe_aar(
|
||||||
Args:
|
Args:
|
||||||
name: the name of the aar.
|
name: the name of the aar.
|
||||||
srcs: the additional java source code to be added into the android library.
|
srcs: the additional java source code to be added into the android library.
|
||||||
|
gen_libmediapipe: whether to generate libmediapipe_jni.so. Default to True.
|
||||||
calculators: the calculator libraries to be compiled into the jni library.
|
calculators: the calculator libraries to be compiled into the jni library.
|
||||||
assets: additional assets to be included into the archive.
|
assets: additional assets to be included into the archive.
|
||||||
assets_dir: path where the assets will the packaged.
|
assets_dir: path where the assets will the packaged.
|
||||||
"""
|
"""
|
||||||
_mediapipe_jni(
|
_mediapipe_jni(
|
||||||
name = name + "_jni",
|
name = name + "_jni",
|
||||||
|
gen_libmediapipe = gen_libmediapipe,
|
||||||
calculators = calculators,
|
calculators = calculators,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -67,6 +70,22 @@ def mediapipe_aar(
|
||||||
name = name + "_proto",
|
name = name + "_proto",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
native.genrule(
|
||||||
|
name = name + "_aar_manifest_generator",
|
||||||
|
outs = ["AndroidManifest.xml"],
|
||||||
|
cmd = """
|
||||||
|
cat > $(OUTS) <<EOF
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||||
|
package="com.google.mediapipe">
|
||||||
|
<uses-sdk
|
||||||
|
android:minSdkVersion="21"
|
||||||
|
android:targetSdkVersion="27" />
|
||||||
|
</manifest>
|
||||||
|
EOF
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
android_library(
|
android_library(
|
||||||
name = name + "_android_lib",
|
name = name + "_android_lib",
|
||||||
srcs = srcs + [
|
srcs = srcs + [
|
||||||
|
@ -84,7 +103,6 @@ def mediapipe_aar(
|
||||||
proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"],
|
proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"],
|
||||||
deps = [
|
deps = [
|
||||||
":" + name + "_jni_cc_lib",
|
":" + name + "_jni_cc_lib",
|
||||||
":" + name + "_jni_opencv_cc_lib",
|
|
||||||
"//mediapipe/framework:calculator_java_proto_lite",
|
"//mediapipe/framework:calculator_java_proto_lite",
|
||||||
"//mediapipe/framework:calculator_profile_java_proto_lite",
|
"//mediapipe/framework:calculator_profile_java_proto_lite",
|
||||||
"//mediapipe/framework:calculator_options_java_proto_lite",
|
"//mediapipe/framework:calculator_options_java_proto_lite",
|
||||||
|
@ -94,6 +112,10 @@ def mediapipe_aar(
|
||||||
"//mediapipe/framework:status_handler_java_proto_lite",
|
"//mediapipe/framework:status_handler_java_proto_lite",
|
||||||
"//mediapipe/framework:stream_handler_java_proto_lite",
|
"//mediapipe/framework:stream_handler_java_proto_lite",
|
||||||
"//mediapipe/framework/tool:calculator_graph_template_java_proto_lite",
|
"//mediapipe/framework/tool:calculator_graph_template_java_proto_lite",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/components:android_components",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/glutil",
|
||||||
"//third_party:androidx_annotation",
|
"//third_party:androidx_annotation",
|
||||||
"//third_party:androidx_appcompat",
|
"//third_party:androidx_appcompat",
|
||||||
"//third_party:androidx_core",
|
"//third_party:androidx_core",
|
||||||
|
@ -108,7 +130,10 @@ def mediapipe_aar(
|
||||||
"@maven//:com_google_flogger_flogger_system_backend",
|
"@maven//:com_google_flogger_flogger_system_backend",
|
||||||
"@maven//:com_google_guava_guava",
|
"@maven//:com_google_guava_guava",
|
||||||
"@maven//:androidx_lifecycle_lifecycle_common",
|
"@maven//:androidx_lifecycle_lifecycle_common",
|
||||||
],
|
] + select({
|
||||||
|
"//conditions:default": [":" + name + "_jni_opencv_cc_lib"],
|
||||||
|
"//mediapipe/framework/port:disable_opencv": [],
|
||||||
|
}),
|
||||||
assets = assets,
|
assets = assets,
|
||||||
assets_dir = assets_dir,
|
assets_dir = assets_dir,
|
||||||
)
|
)
|
||||||
|
@ -121,22 +146,6 @@ def _mediapipe_proto(name):
|
||||||
Args:
|
Args:
|
||||||
name: the name of the target.
|
name: the name of the target.
|
||||||
"""
|
"""
|
||||||
native.genrule(
|
|
||||||
name = name + "_aar_manifest_generator",
|
|
||||||
outs = ["AndroidManifest.xml"],
|
|
||||||
cmd = """
|
|
||||||
cat > $(OUTS) <<EOF
|
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
|
||||||
package="com.google.mediapipe">
|
|
||||||
<uses-sdk
|
|
||||||
android:minSdkVersion="21"
|
|
||||||
android:targetSdkVersion="27" />
|
|
||||||
<application />
|
|
||||||
</manifest>
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
_proto_java_src_generator(
|
_proto_java_src_generator(
|
||||||
name = "calculator_proto",
|
name = "calculator_proto",
|
||||||
proto_src = "mediapipe/framework/calculator.proto",
|
proto_src = "mediapipe/framework/calculator.proto",
|
||||||
|
@ -204,21 +213,23 @@ def _proto_java_src_generator(name, proto_src, java_lite_out, srcs = []):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def _mediapipe_jni(name, calculators = []):
|
def _mediapipe_jni(name, gen_libmediapipe, calculators = []):
|
||||||
"""Generates MediaPipe jni library.
|
"""Generates MediaPipe jni library.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
name: the name of the target.
|
name: the name of the target.
|
||||||
|
gen_libmediapipe: whether to generate libmediapipe_jni.so. Default to True.
|
||||||
calculators: the calculator libraries to be compiled into the jni library.
|
calculators: the calculator libraries to be compiled into the jni library.
|
||||||
"""
|
"""
|
||||||
native.cc_binary(
|
if gen_libmediapipe:
|
||||||
name = "libmediapipe_jni.so",
|
native.cc_binary(
|
||||||
linkshared = 1,
|
name = "libmediapipe_jni.so",
|
||||||
linkstatic = 1,
|
linkshared = 1,
|
||||||
deps = [
|
linkstatic = 1,
|
||||||
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
deps = [
|
||||||
] + calculators,
|
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||||
)
|
] + calculators,
|
||||||
|
)
|
||||||
|
|
||||||
native.cc_library(
|
native.cc_library(
|
||||||
name = name + "_cc_lib",
|
name = name + "_cc_lib",
|
||||||
|
|
|
@ -22,6 +22,9 @@ android_library(
|
||||||
["*.java"],
|
["*.java"],
|
||||||
exclude = [
|
exclude = [
|
||||||
"CameraInput.java",
|
"CameraInput.java",
|
||||||
|
"ResultGlRenderer.java",
|
||||||
|
"SolutionGlSurfaceView.java",
|
||||||
|
"SolutionGlSurfaceViewRenderer.java",
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
|
@ -29,6 +32,7 @@ android_library(
|
||||||
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||||
"//mediapipe/java/com/google/mediapipe/glutil",
|
"//mediapipe/java/com/google/mediapipe/glutil",
|
||||||
"//third_party:autovalue",
|
"//third_party:autovalue",
|
||||||
|
"@com_google_protobuf//:protobuf_javalite",
|
||||||
"@maven//:com_google_code_findbugs_jsr305",
|
"@maven//:com_google_code_findbugs_jsr305",
|
||||||
"@maven//:com_google_guava_guava",
|
"@maven//:com_google_guava_guava",
|
||||||
],
|
],
|
||||||
|
@ -46,6 +50,23 @@ android_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
android_library(
|
||||||
|
name = "solution_rendering",
|
||||||
|
srcs = [
|
||||||
|
"ResultGlRenderer.java",
|
||||||
|
"SolutionGlSurfaceView.java",
|
||||||
|
"SolutionGlSurfaceViewRenderer.java",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":solution_base",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/components:android_components",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/glutil",
|
||||||
|
"@maven//:com_google_guava_guava",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Native dependencies of all MediaPipe solutions.
|
# Native dependencies of all MediaPipe solutions.
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "libmediapipe_jni.so",
|
name = "libmediapipe_jni.so",
|
||||||
|
@ -65,3 +86,11 @@ cc_library(
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar")
|
||||||
|
|
||||||
|
mediapipe_aar(
|
||||||
|
name = "solution_core",
|
||||||
|
srcs = glob(["*.java"]),
|
||||||
|
gen_libmediapipe = False,
|
||||||
|
)
|
||||||
|
|
|
@ -39,6 +39,9 @@ public class ImageSolutionResult implements SolutionResult {
|
||||||
|
|
||||||
// Returns the corresponding input image as a {@link Bitmap}.
|
// Returns the corresponding input image as a {@link Bitmap}.
|
||||||
public Bitmap inputBitmap() {
|
public Bitmap inputBitmap() {
|
||||||
|
if (imagePacket == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
if (cachedBitmap != null) {
|
if (cachedBitmap != null) {
|
||||||
return cachedBitmap;
|
return cachedBitmap;
|
||||||
}
|
}
|
||||||
|
@ -49,6 +52,9 @@ public class ImageSolutionResult implements SolutionResult {
|
||||||
// Returns the corresponding input image as a {@link TextureFrame}. The caller must release the
|
// Returns the corresponding input image as a {@link TextureFrame}. The caller must release the
|
||||||
// acquired {@link TextureFrame} after using.
|
// acquired {@link TextureFrame} after using.
|
||||||
public TextureFrame acquireTextureFrame() {
|
public TextureFrame acquireTextureFrame() {
|
||||||
|
if (imagePacket == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return PacketGetter.getTextureFrame(imagePacket);
|
return PacketGetter.getTextureFrame(imagePacket);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutionbase;
|
||||||
|
|
||||||
|
/** Interface for the customizable MediaPipe solution result OpenGL renderer. */
|
||||||
|
public interface ResultGlRenderer<T extends ImageSolutionResult> {
|
||||||
|
|
||||||
|
/** Sets up OpenGL rendering when the surface is created or recreated. */
|
||||||
|
void setupRendering();
|
||||||
|
|
||||||
|
/** Renders the solution result. */
|
||||||
|
void renderResult(T result);
|
||||||
|
}
|
|
@ -45,9 +45,7 @@ public class SolutionBase {
|
||||||
protected final AtomicBoolean solutionGraphStarted = new AtomicBoolean(false);
|
protected final AtomicBoolean solutionGraphStarted = new AtomicBoolean(false);
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// Load all native libraries needed by the app.
|
|
||||||
System.loadLibrary("mediapipe_jni");
|
System.loadLibrary("mediapipe_jni");
|
||||||
System.loadLibrary("opencv_java3");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutionbase;
|
||||||
|
|
||||||
|
import android.content.Context;
|
||||||
|
import android.opengl.GLES20;
|
||||||
|
import android.opengl.GLSurfaceView;
|
||||||
|
import android.util.Log;
|
||||||
|
import android.view.SurfaceHolder;
|
||||||
|
import android.view.View;
|
||||||
|
import com.google.mediapipe.glutil.EglManager;
|
||||||
|
import javax.microedition.khronos.egl.EGL10;
|
||||||
|
import javax.microedition.khronos.egl.EGLConfig;
|
||||||
|
import javax.microedition.khronos.egl.EGLContext;
|
||||||
|
import javax.microedition.khronos.egl.EGLDisplay;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simplified GlSurfaceView implementation for displaying MediaPipe Solution results.
|
||||||
|
*
|
||||||
|
* <p>Users need to provide a custom {@link ResultGlRenderer} via {@link
|
||||||
|
* setSolutionResultRenderer(ResultGlRenderer)} for rendering MediaPipe solution results. Setting
|
||||||
|
* the latest render data by calling {@link #setRenderData(ImageSolutionResult)} before invoking
|
||||||
|
* {@link #requestRender}. By default, the solution renderer renders the input images. Call {@link
|
||||||
|
* #setRenderInputImage(boolean)} to explicitly set whether the input images should be rendered or
|
||||||
|
* not.
|
||||||
|
*/
|
||||||
|
public class SolutionGlSurfaceView<T extends ImageSolutionResult> extends GLSurfaceView {
|
||||||
|
private static final String TAG = "SolutionGlSurfaceView";
|
||||||
|
SolutionGlSurfaceViewRenderer<T> renderer = new SolutionGlSurfaceViewRenderer<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a user-defined {@link ResultGlRenderer} for rendering MediaPipe solution results.
|
||||||
|
*
|
||||||
|
* @param resultRenderer a {@link ResultGlRenderer}.
|
||||||
|
*/
|
||||||
|
public void setSolutionResultRenderer(ResultGlRenderer<T> resultRenderer) {
|
||||||
|
renderer.setSolutionResultRenderer(resultRenderer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the next textureframe and solution result to render.
|
||||||
|
*
|
||||||
|
* @param solutionResult a solution result object that contains the solution outputs and a
|
||||||
|
* textureframe.
|
||||||
|
*/
|
||||||
|
public void setRenderData(T solutionResult) {
|
||||||
|
renderer.setRenderData(solutionResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets if the input image needs to be rendered. Default to true. */
|
||||||
|
public void setRenderInputImage(boolean renderInputImage) {
|
||||||
|
renderer.setRenderInputImage(renderInputImage);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Initializes SolutionGlSurfaceView with Android context, gl context, and gl version number. */
|
||||||
|
public SolutionGlSurfaceView(Context context, EGLContext glContext, int glMajorVersion) {
|
||||||
|
super(context);
|
||||||
|
setEGLContextClientVersion(glMajorVersion);
|
||||||
|
getHolder().addCallback(new HolderCallbacks());
|
||||||
|
setEGLContextFactory(
|
||||||
|
new GLSurfaceView.EGLContextFactory() {
|
||||||
|
@Override
|
||||||
|
public EGLContext createContext(EGL10 egl, EGLDisplay display, EGLConfig eglConfig) {
|
||||||
|
int[] contextAttrs = {
|
||||||
|
EglManager.EGL_CONTEXT_CLIENT_VERSION, glMajorVersion, EGL10.EGL_NONE
|
||||||
|
};
|
||||||
|
return egl.eglCreateContext(display, eglConfig, glContext, contextAttrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroyContext(EGL10 egl, EGLDisplay display, EGLContext context) {
|
||||||
|
if (!egl.eglDestroyContext(display, context)) {
|
||||||
|
throw new RuntimeException("eglDestroyContext failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
renderer.setTextureTarget(GLES20.GL_TEXTURE_2D);
|
||||||
|
super.setRenderer(renderer);
|
||||||
|
setRenderMode(GLSurfaceView.RENDERMODE_WHEN_DIRTY);
|
||||||
|
setVisibility(View.GONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class HolderCallbacks implements SurfaceHolder.Callback {
|
||||||
|
@Override
|
||||||
|
public void surfaceCreated(SurfaceHolder holder) {
|
||||||
|
Log.d(TAG, "main surfaceCreated");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
|
||||||
|
Log.d(
|
||||||
|
TAG,
|
||||||
|
String.format(
|
||||||
|
"main surfaceChanged. width: %d height: %d glViewWidth: %d glViewHeight: %d",
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
SolutionGlSurfaceView.this.getWidth(),
|
||||||
|
SolutionGlSurfaceView.this.getHeight()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void surfaceDestroyed(SurfaceHolder holder) {
|
||||||
|
Log.d(TAG, "main surfaceDestroyed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutionbase;
|
||||||
|
|
||||||
|
import android.graphics.SurfaceTexture;
|
||||||
|
import com.google.mediapipe.components.GlSurfaceViewRenderer;
|
||||||
|
import com.google.mediapipe.framework.TextureFrame;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import javax.microedition.khronos.egl.EGLConfig;
|
||||||
|
import javax.microedition.khronos.opengles.GL10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MediaPipe Solution's GlSurfaceViewRenderer.
|
||||||
|
*
|
||||||
|
* <p>Users can provide a custom {@link ResultGlRenderer} for rendering MediaPipe solution results.
|
||||||
|
* For setting the latest solution result, call {@link #setRenderData(ImageSolutionResult)}. By
|
||||||
|
* default, the renderer renders the input images. Call {@link #setRenderInputImage(boolean)} to
|
||||||
|
* explicitly set whether the input images should be rendered or not.
|
||||||
|
*/
|
||||||
|
public class SolutionGlSurfaceViewRenderer<T extends ImageSolutionResult>
|
||||||
|
extends GlSurfaceViewRenderer {
|
||||||
|
private static final String TAG = "SolutionGlSurfaceViewRenderer";
|
||||||
|
private boolean renderInputImage = true;
|
||||||
|
private final AtomicReference<T> nextSolutionResult = new AtomicReference<>();
|
||||||
|
private ResultGlRenderer<T> resultGlRenderer;
|
||||||
|
|
||||||
|
/** Sets if the input image needs to be rendered. Default to true. */
|
||||||
|
public void setRenderInputImage(boolean renderInputImage) {
|
||||||
|
this.renderInputImage = renderInputImage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets a user-defined {@link ResultGlRenderer} for rendering MediaPipe solution results. */
|
||||||
|
public void setSolutionResultRenderer(ResultGlRenderer<T> resultGlRenderer) {
|
||||||
|
this.resultGlRenderer = resultGlRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the next textureframe and solution result to render.
|
||||||
|
*
|
||||||
|
* @param solutionResult a solution result object that contains the solution outputs and a
|
||||||
|
* textureframe.
|
||||||
|
*/
|
||||||
|
public void setRenderData(T solutionResult) {
|
||||||
|
setNextFrame(solutionResult.acquireTextureFrame());
|
||||||
|
nextSolutionResult.getAndSet(solutionResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onSurfaceCreated(GL10 gl, EGLConfig config) {
|
||||||
|
super.onSurfaceCreated(gl, config);
|
||||||
|
resultGlRenderer.setupRendering();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onDrawFrame(GL10 gl) {
|
||||||
|
TextureFrame frame = null;
|
||||||
|
if (renderInputImage) {
|
||||||
|
frame = renderFrame();
|
||||||
|
}
|
||||||
|
if (nextSolutionResult != null) {
|
||||||
|
T solutionResult = nextSolutionResult.getAndSet(null);
|
||||||
|
resultGlRenderer.renderResult(solutionResult);
|
||||||
|
}
|
||||||
|
flush(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSurfaceTexture(SurfaceTexture texture) {
|
||||||
|
throw new IllegalStateException("SurfaceTexture should not be used in MediaPipe Solution.");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||||
|
package="com.google.mediapipe.solutions.hands">
|
||||||
|
|
||||||
|
<uses-sdk android:minSdkVersion="21"
|
||||||
|
android:targetSdkVersion="27" />
|
||||||
|
|
||||||
|
</manifest>
|
45
mediapipe/java/com/google/mediapipe/solutions/hands/BUILD
Normal file
45
mediapipe/java/com/google/mediapipe/solutions/hands/BUILD
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Copyright 2021 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
android_library(
|
||||||
|
name = "hands",
|
||||||
|
srcs = [
|
||||||
|
"HandLandmark.java",
|
||||||
|
"Hands.java",
|
||||||
|
"HandsOptions.java",
|
||||||
|
"HandsResult.java",
|
||||||
|
],
|
||||||
|
assets = [
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
|
||||||
|
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
||||||
|
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||||
|
],
|
||||||
|
assets_dir = "",
|
||||||
|
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],
|
||||||
|
manifest = ":AndroidManifest.xml",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/framework/formats:classification_java_proto_lite",
|
||||||
|
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||||
|
"//mediapipe/java/com/google/mediapipe/solutionbase:solution_base",
|
||||||
|
"//third_party:autovalue",
|
||||||
|
"@maven//:androidx_annotation_annotation",
|
||||||
|
"@maven//:com_google_code_findbugs_jsr305",
|
||||||
|
"@maven//:com_google_guava_guava",
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1,72 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutions.hands;
|
||||||
|
|
||||||
|
import androidx.annotation.IntDef;
|
||||||
|
|
||||||
|
/** The 21 hand landmarks. */
|
||||||
|
public final class HandLandmark {
|
||||||
|
public static final int NUM_LANDMARKS = 21;
|
||||||
|
|
||||||
|
public static final int WRIST = 0;
|
||||||
|
public static final int THUMB_CMC = 1;
|
||||||
|
public static final int THUMB_MCP = 2;
|
||||||
|
public static final int THUMB_DIP = 3;
|
||||||
|
public static final int THUMB_TIP = 4;
|
||||||
|
public static final int INDEX_FINGER_MCP = 5;
|
||||||
|
public static final int INDEX_FINGER_PIP = 6;
|
||||||
|
public static final int INDEX_FINGER_DIP = 7;
|
||||||
|
public static final int INDEX_FINGER_TIP = 8;
|
||||||
|
public static final int MIDDLE_FINGER_MCP = 9;
|
||||||
|
public static final int MIDDLE_FINGER_PIP = 10;
|
||||||
|
public static final int MIDDLE_FINGER_DIP = 11;
|
||||||
|
public static final int MIDDLE_FINGER_TIP = 12;
|
||||||
|
public static final int RING_FINGER_MCP = 13;
|
||||||
|
public static final int RING_FINGER_PIP = 14;
|
||||||
|
public static final int RING_FINGER_DIP = 15;
|
||||||
|
public static final int RING_FINGER_TIP = 16;
|
||||||
|
public static final int PINKY_MCP = 17;
|
||||||
|
public static final int PINKY_PIP = 18;
|
||||||
|
public static final int PINKY_DIP = 19;
|
||||||
|
public static final int PINKY_TIP = 20;
|
||||||
|
|
||||||
|
/** Represents a hand landmark type. */
|
||||||
|
@IntDef({
|
||||||
|
WRIST,
|
||||||
|
THUMB_CMC,
|
||||||
|
THUMB_MCP,
|
||||||
|
THUMB_DIP,
|
||||||
|
THUMB_TIP,
|
||||||
|
INDEX_FINGER_MCP,
|
||||||
|
INDEX_FINGER_PIP,
|
||||||
|
INDEX_FINGER_DIP,
|
||||||
|
INDEX_FINGER_TIP,
|
||||||
|
MIDDLE_FINGER_MCP,
|
||||||
|
MIDDLE_FINGER_PIP,
|
||||||
|
MIDDLE_FINGER_DIP,
|
||||||
|
MIDDLE_FINGER_TIP,
|
||||||
|
RING_FINGER_MCP,
|
||||||
|
RING_FINGER_PIP,
|
||||||
|
RING_FINGER_DIP,
|
||||||
|
RING_FINGER_TIP,
|
||||||
|
PINKY_MCP,
|
||||||
|
PINKY_PIP,
|
||||||
|
PINKY_DIP,
|
||||||
|
PINKY_TIP,
|
||||||
|
})
|
||||||
|
public @interface HandLandmarkType {}
|
||||||
|
|
||||||
|
private HandLandmark() {}
|
||||||
|
}
|
132
mediapipe/java/com/google/mediapipe/solutions/hands/Hands.java
Normal file
132
mediapipe/java/com/google/mediapipe/solutions/hands/Hands.java
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutions.hands;
|
||||||
|
|
||||||
|
import android.content.Context;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||||
|
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
|
||||||
|
import com.google.mediapipe.framework.MediaPipeException;
|
||||||
|
import com.google.mediapipe.framework.Packet;
|
||||||
|
import com.google.mediapipe.solutionbase.ErrorListener;
|
||||||
|
import com.google.mediapipe.solutionbase.ImageSolutionBase;
|
||||||
|
import com.google.mediapipe.solutionbase.OutputHandler;
|
||||||
|
import com.google.mediapipe.solutionbase.ResultListener;
|
||||||
|
import com.google.mediapipe.solutionbase.SolutionInfo;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MediaPipe Hands Solution API.
|
||||||
|
*
|
||||||
|
* <p>MediaPipe Hands processes a {@link TextureFrame} or a {@link Bitmap} and returns the hand
|
||||||
|
* landmarks and handedness (left v.s. right hand) of each detected hand. Please refer to
|
||||||
|
* https://solutions.mediapipe.dev/hands#android-solution-api for usage examples.
|
||||||
|
*/
|
||||||
|
public class Hands extends ImageSolutionBase {
|
||||||
|
private static final String TAG = "Hands";
|
||||||
|
|
||||||
|
private static final String NUM_HANDS = "num_hands";
|
||||||
|
private static final String SOLUTION_GRAPH_NAME = "hand_landmark_tracking_gpu_image.binarypb";
|
||||||
|
private static final String IMAGE_INPUT_STREAM = "image";
|
||||||
|
private static final ImmutableList<String> OUTPUT_STREAMS =
|
||||||
|
ImmutableList.of("multi_hand_landmarks", "multi_handedness", "image");
|
||||||
|
private static final int LANDMARKS_INDEX = 0;
|
||||||
|
private static final int HANDEDNESS_INDEX = 1;
|
||||||
|
private static final int INPUT_IMAGE_INDEX = 2;
|
||||||
|
private final OutputHandler<HandsResult> graphOutputHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes MediaPipe Hands solution.
|
||||||
|
*
|
||||||
|
* @param context an Android {@link Context}.
|
||||||
|
* @param options the configuration options defined in {@link HandsOptions}.
|
||||||
|
*/
|
||||||
|
public Hands(Context context, HandsOptions options) {
|
||||||
|
graphOutputHandler = new OutputHandler<>();
|
||||||
|
graphOutputHandler.setOutputConverter(
|
||||||
|
packets -> {
|
||||||
|
HandsResult.Builder handsResultBuilder = HandsResult.builder();
|
||||||
|
try {
|
||||||
|
handsResultBuilder.setMultiHandLandmarks(
|
||||||
|
getProtoVector(packets.get(LANDMARKS_INDEX), NormalizedLandmarkList.parser()));
|
||||||
|
} catch (MediaPipeException e) {
|
||||||
|
throwException("Error occurs when getting MediaPipe hand landmarks. ", e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
handsResultBuilder.setMultiHandedness(
|
||||||
|
getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser()));
|
||||||
|
} catch (MediaPipeException e) {
|
||||||
|
throwException("Error occurs when getting MediaPipe handedness data. ", e);
|
||||||
|
}
|
||||||
|
return handsResultBuilder
|
||||||
|
.setImagePacket(packets.get(INPUT_IMAGE_INDEX))
|
||||||
|
.setTimestamp(
|
||||||
|
staticImageMode ? Long.MIN_VALUE : packets.get(INPUT_IMAGE_INDEX).getTimestamp())
|
||||||
|
.build();
|
||||||
|
});
|
||||||
|
|
||||||
|
SolutionInfo solutionInfo =
|
||||||
|
SolutionInfo.builder()
|
||||||
|
.setBinaryGraphPath(SOLUTION_GRAPH_NAME)
|
||||||
|
.setImageInputStreamName(IMAGE_INPUT_STREAM)
|
||||||
|
.setOutputStreamNames(OUTPUT_STREAMS)
|
||||||
|
.setStaticImageMode(options.mode() == HandsOptions.STATIC_IMAGE_MODE)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
initialize(context, solutionInfo, graphOutputHandler);
|
||||||
|
Map<String, Packet> inputSidePackets = new HashMap<>();
|
||||||
|
inputSidePackets.put(NUM_HANDS, packetCreator.createInt32(options.maxNumHands()));
|
||||||
|
start(inputSidePackets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a callback to be invoked when the HandsResults become available.
|
||||||
|
*
|
||||||
|
* @param listener the {@link ResultListener} callback.
|
||||||
|
*/
|
||||||
|
public void setResultListener(ResultListener<HandsResult> listener) {
|
||||||
|
this.graphOutputHandler.setResultListener(listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a callback to be invoked when the Hands solution throws errors.
|
||||||
|
*
|
||||||
|
* @param listener the {@link ErrorListener} callback.
|
||||||
|
*/
|
||||||
|
public void setErrorListener(@Nullable ErrorListener listener) {
|
||||||
|
this.graphOutputHandler.setErrorListener(listener);
|
||||||
|
this.errorListener = listener;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a specific hand landmark by hand index and hand landmark type.
|
||||||
|
*
|
||||||
|
* @param result the returned {@link HandsResult} object.
|
||||||
|
* @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
|
||||||
|
* @param landmarkType the hand landmark type defined in {@link HandLandmark}.
|
||||||
|
*/
|
||||||
|
public static NormalizedLandmark getHandLandmark(
|
||||||
|
HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
|
||||||
|
if (result == null
|
||||||
|
|| handIndex >= result.multiHandLandmarks().size()
|
||||||
|
|| landmarkType >= HandLandmark.NUM_LANDMARKS) {
|
||||||
|
return NormalizedLandmark.getDefaultInstance();
|
||||||
|
}
|
||||||
|
return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutions.hands;
|
||||||
|
|
||||||
|
import androidx.annotation.IntDef;
|
||||||
|
import com.google.auto.value.AutoValue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MediaPipe Hands solution-specific options.
|
||||||
|
*
|
||||||
|
* <p>mode: Whether to treat the input images as a batch of static and possibly unrelated images, or
|
||||||
|
* a video stream. See details in https://solutions.mediapipe.dev/hands#static_image_mode.
|
||||||
|
*
|
||||||
|
* <p>maxNumHands: Maximum number of hands to detect. See details in
|
||||||
|
* https://solutions.mediapipe.dev/hands#max_num_hands.
|
||||||
|
*
|
||||||
|
* <p>minDetectionConfidence: Minimum confidence value ([0.0, 1.0]) for hand detection to be
|
||||||
|
* considered successful. See details in
|
||||||
|
* https://solutions.mediapipe.dev/hands#min_detection_confidence.
|
||||||
|
*
|
||||||
|
* <p>minTrackingConfidence: Minimum confidence value ([0.0, 1.0]) for the hand landmarks to be
|
||||||
|
* considered tracked successfully. See details in
|
||||||
|
* https://solutions.mediapipe.dev/hands#min_tracking_confidence.
|
||||||
|
*/
|
||||||
|
@AutoValue
|
||||||
|
public abstract class HandsOptions {
|
||||||
|
|
||||||
|
// TODO: Switch to use boolean variable.
|
||||||
|
public static final int STREAMING_MODE = 1;
|
||||||
|
public static final int STATIC_IMAGE_MODE = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates whether to treat the input images as a batch of static and possibly unrelated images,
|
||||||
|
* or a video stream.
|
||||||
|
*/
|
||||||
|
@IntDef({STREAMING_MODE, STATIC_IMAGE_MODE})
|
||||||
|
public @interface Mode {}
|
||||||
|
|
||||||
|
@Mode
|
||||||
|
public abstract int mode();
|
||||||
|
|
||||||
|
public abstract int maxNumHands();
|
||||||
|
|
||||||
|
public abstract float minDetectionConfidence();
|
||||||
|
|
||||||
|
public abstract float minTrackingConfidence();
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new AutoValue_HandsOptions.Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Builder for {@link HandsOptions}. */
|
||||||
|
@AutoValue.Builder
|
||||||
|
public abstract static class Builder {
|
||||||
|
public abstract Builder setMode(int value);
|
||||||
|
|
||||||
|
public abstract Builder setMaxNumHands(int value);
|
||||||
|
|
||||||
|
public abstract Builder setMinDetectionConfidence(float value);
|
||||||
|
|
||||||
|
public abstract Builder setMinTrackingConfidence(float value);
|
||||||
|
|
||||||
|
public abstract HandsOptions build();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
// Copyright 2021 The MediaPipe Authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package com.google.mediapipe.solutions.hands;
|
||||||
|
|
||||||
|
import android.graphics.Bitmap;
|
||||||
|
import com.google.auto.value.AutoBuilder;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||||
|
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
|
||||||
|
import com.google.mediapipe.framework.Packet;
|
||||||
|
import com.google.mediapipe.framework.TextureFrame;
|
||||||
|
import com.google.mediapipe.solutionbase.ImageSolutionResult;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HandsResult contains a collection of detected/tracked hands, a collection of handedness of the
|
||||||
|
* detected/tracked hands, and the input {@link Bitmap} or {@link TextureFrame}. If not in static
|
||||||
|
* image mode, the timestamp field will be set to the timestamp of the corresponding input image.
|
||||||
|
*/
|
||||||
|
public class HandsResult extends ImageSolutionResult {
|
||||||
|
private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
|
||||||
|
private final ImmutableList<Classification> multiHandedness;
|
||||||
|
|
||||||
|
HandsResult(
|
||||||
|
ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
|
||||||
|
ImmutableList<Classification> multiHandedness,
|
||||||
|
Packet imagePacket,
|
||||||
|
long timestamp) {
|
||||||
|
this.multiHandLandmarks = multiHandLandmarks;
|
||||||
|
this.multiHandedness = multiHandedness;
|
||||||
|
this.timestamp = timestamp;
|
||||||
|
this.imagePacket = imagePacket;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collection of detected/tracked hands, where each hand is represented as a list of 21 hand
|
||||||
|
// landmarks and each landmark is composed of x, y and z. x and y are normalized to [0.0, 1.0] by
|
||||||
|
// the image width and height respectively. z represents the landmark depth with the depth at the
|
||||||
|
// wrist being the origin, and the smaller the value the closer the landmark is to the camera. The
|
||||||
|
// magnitude of z uses roughly the same scale as x.
|
||||||
|
public ImmutableList<NormalizedLandmarkList> multiHandLandmarks() {
|
||||||
|
return multiHandLandmarks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
|
||||||
|
// hand is composed of label and score. label is a string of value either "Left" or "Right". score
|
||||||
|
// is the estimated probability of the predicted handedness and is always greater than or equal to
|
||||||
|
// 0.5 (and the opposite handedness has an estimated probability of 1 - score).
|
||||||
|
public ImmutableList<Classification> multiHandedness() {
|
||||||
|
return multiHandedness;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new AutoBuilder_HandsResult_Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Builder for {@link HandsResult}. */
|
||||||
|
@AutoBuilder
|
||||||
|
public abstract static class Builder {
|
||||||
|
abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);
|
||||||
|
|
||||||
|
abstract Builder setMultiHandedness(List<Classification> value);
|
||||||
|
|
||||||
|
abstract Builder setTimestamp(long value);
|
||||||
|
|
||||||
|
abstract Builder setImagePacket(Packet value);
|
||||||
|
|
||||||
|
abstract HandsResult build();
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,11 +22,11 @@ licenses(["notice"])
|
||||||
package(default_visibility = ["//visibility:public"])
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_front_by_roi_cpu",
|
name = "face_detection_short_range_by_roi_cpu",
|
||||||
graph = "face_detection_front_by_roi_cpu.pbtxt",
|
graph = "face_detection_short_range_by_roi_cpu.pbtxt",
|
||||||
register_as = "FaceDetectionFrontByRoiCpu",
|
register_as = "FaceDetectionShortRangeByRoiCpu",
|
||||||
deps = [
|
deps = [
|
||||||
":face_detection_front_common",
|
":face_detection_short_range_common",
|
||||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
"//mediapipe/calculators/tensor:inference_calculator",
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
"//mediapipe/calculators/util:to_image_calculator",
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
@ -34,11 +34,11 @@ mediapipe_simple_subgraph(
|
||||||
)
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_front_by_roi_gpu",
|
name = "face_detection_short_range_by_roi_gpu",
|
||||||
graph = "face_detection_front_by_roi_gpu.pbtxt",
|
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
|
||||||
register_as = "FaceDetectionFrontByRoiGpu",
|
register_as = "FaceDetectionShortRangeByRoiGpu",
|
||||||
deps = [
|
deps = [
|
||||||
":face_detection_front_common",
|
":face_detection_short_range_common",
|
||||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
"//mediapipe/calculators/tensor:inference_calculator",
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
"//mediapipe/calculators/util:to_image_calculator",
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
@ -46,11 +46,11 @@ mediapipe_simple_subgraph(
|
||||||
)
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_front_cpu",
|
name = "face_detection_short_range_cpu",
|
||||||
graph = "face_detection_front_cpu.pbtxt",
|
graph = "face_detection_short_range_cpu.pbtxt",
|
||||||
register_as = "FaceDetectionFrontCpu",
|
register_as = "FaceDetectionShortRangeCpu",
|
||||||
deps = [
|
deps = [
|
||||||
":face_detection_front_common",
|
":face_detection_short_range_common",
|
||||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
"//mediapipe/calculators/tensor:inference_calculator",
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
"//mediapipe/calculators/util:to_image_calculator",
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
@ -58,11 +58,11 @@ mediapipe_simple_subgraph(
|
||||||
)
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_front_gpu",
|
name = "face_detection_short_range_gpu",
|
||||||
graph = "face_detection_front_gpu.pbtxt",
|
graph = "face_detection_short_range_gpu.pbtxt",
|
||||||
register_as = "FaceDetectionFrontGpu",
|
register_as = "FaceDetectionShortRangeGpu",
|
||||||
deps = [
|
deps = [
|
||||||
":face_detection_front_common",
|
":face_detection_short_range_common",
|
||||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
"//mediapipe/calculators/tensor:inference_calculator",
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
"//mediapipe/calculators/util:to_image_calculator",
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
@ -70,9 +70,45 @@ mediapipe_simple_subgraph(
|
||||||
)
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_front_common",
|
name = "face_detection_short_range_common",
|
||||||
graph = "face_detection_front_common.pbtxt",
|
graph = "face_detection_short_range_common.pbtxt",
|
||||||
register_as = "FaceDetectionFrontCommon",
|
register_as = "FaceDetectionShortRangeCommon",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_full_range_cpu",
|
||||||
|
graph = "face_detection_full_range_cpu.pbtxt",
|
||||||
|
register_as = "FaceDetectionFullRangeCpu",
|
||||||
|
deps = [
|
||||||
|
":face_detection_full_range_common",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_full_range_gpu",
|
||||||
|
graph = "face_detection_full_range_gpu.pbtxt",
|
||||||
|
register_as = "FaceDetectionFullRangeGpu",
|
||||||
|
deps = [
|
||||||
|
":face_detection_full_range_common",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_full_range_common",
|
||||||
|
graph = "face_detection_full_range_common.pbtxt",
|
||||||
|
register_as = "FaceDetectionFullRangeCommon",
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
@ -83,8 +119,8 @@ mediapipe_simple_subgraph(
|
||||||
|
|
||||||
exports_files(
|
exports_files(
|
||||||
srcs = [
|
srcs = [
|
||||||
"face_detection_back.tflite",
|
"face_detection_full_range.tflite",
|
||||||
"face_detection_back_sparse.tflite",
|
"face_detection_full_range_sparse.tflite",
|
||||||
"face_detection_front.tflite",
|
"face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
Subgraphs|Details
|
Subgraphs|Details
|
||||||
:--- | :---
|
:--- | :---
|
||||||
[`FaceDetectionFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (CPU input, and inference is executed on CPU.)
|
[`FaceDetectionFullRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (CPU input, and inference is executed on CPU.)
|
||||||
[`FaceDetectionFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (GPU input, and inference is executed on GPU.)
|
[`FaceDetectionFullRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (GPU input, and inference is executed on GPU.)
|
||||||
|
[`FaceDetectionShortRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (CPU input, and inference is executed on CPU.)
|
||||||
|
[`FaceDetectionShortRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (GPU input, and inference is executed on GPU.)
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
# MediaPipe graph performing common processing to detect faces using
|
||||||
|
# face_detection_full_range_sparse.tflite model, currently consisting of tensor
|
||||||
|
# post processing.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceDetectionFullRangeCommon"
|
||||||
|
# input_stream: "TENSORS:detection_tensors"
|
||||||
|
# input_stream: "MATRIX:transform_matrix"
|
||||||
|
# output_stream: "DETECTIONS:detections"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceDetectionShortRangeCommon"
|
||||||
|
|
||||||
|
# Detection tensors. (std::vector<Tensor>)
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
|
||||||
|
# A 4x4 row-major-order matrix that maps a point represented in the detection
|
||||||
|
# tensors to a desired coordinate system, e.g., in the original input image
|
||||||
|
# before scaling/cropping. (std::array<float, 16>)
|
||||||
|
input_stream: "MATRIX:transform_matrix"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||||
|
num_layers: 1
|
||||||
|
min_scale: 0.1484375
|
||||||
|
max_scale: 0.75
|
||||||
|
input_size_height: 192
|
||||||
|
input_size_width: 192
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
strides: 4
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
fixed_anchor_size: true
|
||||||
|
interpolated_scale_aspect_ratio: 0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:unfiltered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||||
|
num_classes: 1
|
||||||
|
num_boxes: 2304
|
||||||
|
num_coords: 16
|
||||||
|
box_coord_offset: 0
|
||||||
|
keypoint_coord_offset: 4
|
||||||
|
num_keypoints: 6
|
||||||
|
num_values_per_keypoint: 2
|
||||||
|
sigmoid_score: true
|
||||||
|
score_clipping_thresh: 100.0
|
||||||
|
reverse_output_order: true
|
||||||
|
x_scale: 192.0
|
||||||
|
y_scale: 192.0
|
||||||
|
h_scale: 192.0
|
||||||
|
w_scale: 192.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "unfiltered_detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||||
|
min_suppression_threshold: 0.3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
algorithm: WEIGHTED
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the detections from input tensor to the corresponding locations on
|
||||||
|
# the original image (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "DetectionProjectionCalculator"
|
||||||
|
input_stream: "DETECTIONS:filtered_detections"
|
||||||
|
input_stream: "PROJECTION_MATRIX:transform_matrix"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||||
|
# CPU.)
|
||||||
|
#
|
||||||
|
# It is required that "face_detection_full_range_sparse.tflite" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||||
|
# path during execution.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceDetectionFullRangeCpu"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# output_stream: "DETECTIONS:face_detections"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceDetectionFullRangeCpu"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# Converts the input CPU image (ImageFrame) to the multi-backend image type
|
||||||
|
# (Image).
|
||||||
|
node: {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_CPU:image"
|
||||||
|
output_stream: "IMAGE:multi_backend_image"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image into a 192x192 tensor while keeping the aspect
|
||||||
|
# ratio (what is expected by the corresponding face detection model), resulting
|
||||||
|
# in potential letterboxing in the transformed image.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:multi_backend_image"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "MATRIX:transform_matrix"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 192
|
||||||
|
output_tensor_height: 192
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: -1.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
border_mode: BORDER_ZERO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||||
|
delegate { xnnpack {} }
|
||||||
|
}
|
||||||
|
#
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs tensor post processing to generate face detections.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeCommon"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_stream: "MATRIX:transform_matrix"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
|
||||||
|
# GPU.)
|
||||||
|
#
|
||||||
|
# It is required that "face_detection_full_range_sparse.tflite" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||||
|
# path during execution.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceDetectionFullRangeGpu"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# output_stream: "DETECTIONS:face_detections"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceDetectionFullRangeGpu"
|
||||||
|
|
||||||
|
# GPU image. (GpuBuffer)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
|
||||||
|
# (Image).
|
||||||
|
node: {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE_GPU:image"
|
||||||
|
output_stream: "IMAGE:multi_backend_image"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||||
|
# ratio (what is expected by the corresponding face detection model), resulting
|
||||||
|
# in potential letterboxing in the transformed image.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:multi_backend_image"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "MATRIX:transform_matrix"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 192
|
||||||
|
output_tensor_height: 192
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: -1.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
border_mode: BORDER_ZERO
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||||
|
#
|
||||||
|
delegate: { gpu { use_advanced_gpu_api: true } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs tensor post processing to generate face detections.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeCommon"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_stream: "MATRIX:transform_matrix"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user