Project import generated by Copybara.
GitOrigin-RevId: f7d09ed033907b893638a8eb4148efa11c0f09a6
|
@ -7,5 +7,4 @@ include MANIFEST.in
|
|||
include README.md
|
||||
include requirements.txt
|
||||
|
||||
recursive-include mediapipe/modules *.tflite *.txt
|
||||
recursive-include mediapipe/graphs *.binarypb
|
||||
recursive-include mediapipe/modules *.tflite *.txt *.binarypb
|
||||
|
|
17
README.md
|
@ -35,9 +35,9 @@ Object Detection
|
|||
[]() | Android | iOS | Desktop | Python | Web | Coral
|
||||
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
|
||||
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | |
|
||||
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
|
||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||
|
@ -53,6 +53,19 @@ See also
|
|||
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
||||
for ML models released in MediaPipe.
|
||||
|
||||
## MediaPipe in Python
|
||||
|
||||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described in:
|
||||
|
||||
* [MediaPipe Face Mesh](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/face_mesh_py_colab)
|
||||
* [MediaPipe Hands](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/hands_py_colab)
|
||||
* [MediaPipe Pose](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/pose_py_colab)
|
||||
|
||||
## MediaPipe on the Web
|
||||
|
||||
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
|
||||
|
|
|
@ -364,9 +364,9 @@ http_archive(
|
|||
)
|
||||
|
||||
#Tensorflow repo should always go after the other external dependencies.
|
||||
# 2020-08-30
|
||||
_TENSORFLOW_GIT_COMMIT = "57b009e31e59bd1a7ae85ef8c0232ed86c9b71db"
|
||||
_TENSORFLOW_SHA256= "de7f5f06204e057383028c7e53f3b352cdf85b3a40981b1a770c9a415a792c0e"
|
||||
# 2020-10-30
|
||||
_TENSORFLOW_GIT_COMMIT = "84384703c0d8b502e33ff6fd7eefd219dca5ff8e"
|
||||
_TENSORFLOW_SHA256= "23fb322fc15a20f7a7838d9a31f8b16f60700a494ea654311a0aa8621769df98"
|
||||
http_archive(
|
||||
name = "org_tensorflow",
|
||||
urls = [
|
||||
|
|
|
@ -93,38 +93,40 @@ for app in ${apps}; do
|
|||
|
||||
echo "=== Target: ${target}"
|
||||
|
||||
if [[ $install_only == false ]]; then
|
||||
bazel_flags=("${default_bazel_flags[@]}")
|
||||
bazel_flags+=(${target})
|
||||
if [[ $strip == true ]]; then
|
||||
bazel_flags+=(--linkopt=-s)
|
||||
fi
|
||||
|
||||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_4
|
||||
fi
|
||||
bazel "${bazel_flags[@]}"
|
||||
cp -f "${bin}" "${apk}"
|
||||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_3
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${app_name} == "objectdetection3d" ]]; then
|
||||
orig_apk=${apk}
|
||||
apk="${out_dir}/${target_name}_shoes.apk"
|
||||
cp -f "${orig_apk}" "${apk}"
|
||||
apks+=(${apk})
|
||||
|
||||
apk="${out_dir}/${target_name}_chairs.apk"
|
||||
categories=("shoe" "chair" "cup" "camera" "shoe_1stage" "chair_1stage")
|
||||
for category in ${categories[@]}; do
|
||||
apk="${out_dir}/${target_name}_${category}.apk"
|
||||
if [[ $install_only == false ]]; then
|
||||
bazel_flags_extended=("${bazel_flags[@]}")
|
||||
if [[ ${category} != "shoe" ]]; then
|
||||
bazel_flags_extended+=(--define ${category}=true)
|
||||
fi
|
||||
echo "bazel ${bazel_flags_extended[@]}"
|
||||
bazel "${bazel_flags_extended[@]}"
|
||||
cp -f "${bin}" "${apk}"
|
||||
fi
|
||||
apks+=(${apk})
|
||||
done
|
||||
else
|
||||
if [[ $install_only == false ]]; then
|
||||
bazel_flags+=(--define chair=true)
|
||||
bazel_flags=("${default_bazel_flags[@]}")
|
||||
bazel_flags+=(${target})
|
||||
if [[ $strip == true ]]; then
|
||||
bazel_flags+=(--linkopt=-s)
|
||||
fi
|
||||
|
||||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_4
|
||||
fi
|
||||
bazel "${bazel_flags[@]}"
|
||||
cp -f "${bin}" "${apk}"
|
||||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_3
|
||||
fi
|
||||
fi
|
||||
apks+=(${apk})
|
||||
fi
|
||||
|
||||
apks+=(${apk})
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
|
@ -86,9 +86,7 @@ for app in ${apps}; do
|
|||
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
|
||||
fi
|
||||
if [[ $build_only == false ]]; then
|
||||
if [[ ${target_name} == "multi_hand_tracking" ]]; then
|
||||
graph_name="hand_tracking/multi_hand_tracking"
|
||||
elif [[ ${target_name} == "object_tracking" ]]; then
|
||||
if [[ ${target_name} == "object_tracking" ]]; then
|
||||
graph_name="tracking/object_detection_tracking"
|
||||
elif [[ ${target_name} == "upper_body_pose_tracking" ]]; then
|
||||
graph_name="pose_tracking/upper_body_pose_tracking"
|
||||
|
|
|
@ -135,6 +135,7 @@ each project.
|
|||
def camerax_version = "1.0.0-beta10"
|
||||
implementation "androidx.camera:camera-core:$camerax_version"
|
||||
implementation "androidx.camera:camera-camera2:$camerax_version"
|
||||
implementation "androidx.camera:camera-lifecycle:$camerax_version"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -427,45 +427,13 @@ Note: This currently works only on Linux, and please first follow
|
|||
|
||||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described below in
|
||||
[Run in python interpreter](#run-in-python-interpreter) and in this
|
||||
[colab](https://mediapipe.page.link/mp-py-colab).
|
||||
install mediapipe` on Linux and macOS, as described in, for instance,
|
||||
[Python section in MediaPipe Pose](../solutions/pose.md#python) and in this
|
||||
[colab](https://mediapipe.page.link/pose_py_colab).
|
||||
|
||||
### Run in Python interpreter
|
||||
|
||||
Using [MediaPipe Pose](../solutions/pose.md) as an example:
|
||||
|
||||
```bash
|
||||
# Activate a Python virtual environment.
|
||||
$ python3 -m venv mp_env && source mp_env/bin/activate
|
||||
|
||||
# Install MediaPipe Python package
|
||||
(mp_env)$ pip install mediapipe
|
||||
|
||||
# Run in Python interpreter
|
||||
(mp_env)$ python3
|
||||
>>> import mediapipe as mp
|
||||
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
|
||||
|
||||
# For image input
|
||||
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
|
||||
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
|
||||
|
||||
# For live camera input
|
||||
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
|
||||
>>> pose_tracker.run_live()
|
||||
|
||||
# Close the tracker.
|
||||
>>> pose_tracker.close()
|
||||
```
|
||||
|
||||
Tip: Use command `deactivate` to exit the Python virtual environment.
|
||||
|
||||
### Building Python package from source
|
||||
|
||||
Follow these steps only if you have local changes and need to build the Python
|
||||
package from source. Otherwise, we strongly encourage our users to simply run
|
||||
`pip install mediapipe`, more convenient and much faster.
|
||||
Follow the steps below only if you have local changes and need to build the
|
||||
Python package from source. Otherwise, we strongly encourage our users to simply
|
||||
run `pip install mediapipe`, more convenient and much faster.
|
||||
|
||||
1. Make sure that Bazel and OpenCV are correctly installed and configured for
|
||||
MediaPipe. Please see [Installation](./install.md) for how to setup Bazel
|
||||
|
|
|
@ -12,7 +12,7 @@ nav_order: 1
|
|||
{:toc}
|
||||
---
|
||||
|
||||
Note: To interoperate with OpenCV, OpenCV 3.x and above are preferred. OpenCV
|
||||
Note: To interoperate with OpenCV, OpenCV 3.x to 4.1 are preferred. OpenCV
|
||||
2.x currently works but interoperability support may be deprecated in the
|
||||
future.
|
||||
|
||||
|
|
BIN
docs/images/mobile/objectron_camera_android_gpu.gif
Normal file
After Width: | Height: | Size: 889 KiB |
Before Width: | Height: | Size: 2.5 MiB After Width: | Height: | Size: 923 KiB |
Before Width: | Height: | Size: 670 KiB After Width: | Height: | Size: 744 KiB |
BIN
docs/images/mobile/objectron_cup_android_gpu.gif
Normal file
After Width: | Height: | Size: 625 KiB |
Before Width: | Height: | Size: 2.8 MiB After Width: | Height: | Size: 968 KiB |
BIN
docs/images/objectron_2stage_network_architecture.png
Normal file
After Width: | Height: | Size: 53 KiB |
|
@ -35,9 +35,9 @@ Object Detection
|
|||
[]() | Android | iOS | Desktop | Python | Web | Coral
|
||||
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
|
||||
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | |
|
||||
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
|
||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||
|
@ -53,6 +53,19 @@ See also
|
|||
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
|
||||
for ML models released in MediaPipe.
|
||||
|
||||
## MediaPipe in Python
|
||||
|
||||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described in:
|
||||
|
||||
* [MediaPipe Face Mesh](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/face_mesh_py_colab)
|
||||
* [MediaPipe Hands](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/hands_py_colab)
|
||||
* [MediaPipe Pose](../solutions/pose.md#python) and
|
||||
[colab](https://mediapipe.page.link/pose_py_colab)
|
||||
|
||||
## MediaPipe on the Web
|
||||
|
||||
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
|
||||
|
|
|
@ -254,6 +254,99 @@ and for iOS modify `kNumFaces` in
|
|||
Tip: Maximum number of faces to detect/process is set to 1 by default. To change
|
||||
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
|
||||
|
||||
#### Python
|
||||
|
||||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described below and in this
|
||||
[colab](https://mediapipe.page.link/face_mesh_py_colab). If you do need to build
|
||||
the Python package from source, see
|
||||
[additional instructions](../getting_started/building_examples.md#python).
|
||||
|
||||
Activate a Python virtual environment:
|
||||
|
||||
```bash
|
||||
$ python3 -m venv mp_env && source mp_env/bin/activate
|
||||
```
|
||||
|
||||
Install MediaPipe Python package:
|
||||
|
||||
```bash
|
||||
(mp_env)$ pip install mediapipe
|
||||
```
|
||||
|
||||
Run the following Python code:
|
||||
|
||||
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/face_mesh.py and copy it over. -->
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_face_mesh = mp.solutions.face_mesh
|
||||
|
||||
# For static images:
|
||||
face_mesh = mp_face_mesh.FaceMesh(
|
||||
static_image_mode=True,
|
||||
max_num_faces=1,
|
||||
min_detection_confidence=0.5)
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
for idx, file in enumerate(file_list):
|
||||
image = cv2.imread(file)
|
||||
# Convert the BGR image to RGB before processing.
|
||||
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
|
||||
# Print and draw face mesh landmarks on the image.
|
||||
if not results.multi_face_landmarks:
|
||||
continue
|
||||
annotated_image = image.copy()
|
||||
for face_landmarks in results.multi_face_landmarks:
|
||||
print('face_landmarks:', face_landmarks)
|
||||
mp_drawing.draw_landmarks(
|
||||
image=annotated_image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
||||
landmark_drawing_spec=drawing_spec,
|
||||
connection_drawing_spec=drawing_spec)
|
||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', image)
|
||||
face_mesh.close()
|
||||
|
||||
# For webcam input:
|
||||
face_mesh = mp_face_mesh.FaceMesh(
|
||||
min_detection_confidence=0.5, min_tracking_confidence=0.5)
|
||||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
|
||||
cap = cv2.VideoCapture(0)
|
||||
while cap.isOpened():
|
||||
success, image = cap.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
# Flip the image horizontally for a later selfie-view display, and convert
|
||||
# the BGR image to RGB.
|
||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
||||
# To improve performance, optionally mark the image as not writeable to
|
||||
# pass by reference.
|
||||
image.flags.writeable = False
|
||||
results = face_mesh.process(image)
|
||||
|
||||
# Draw the face mesh annotations on the image.
|
||||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
if results.multi_face_landmarks:
|
||||
for face_landmarks in results.multi_face_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image=image,
|
||||
landmark_list=face_landmarks,
|
||||
connections=mp_face_mesh.FACE_CONNECTIONS,
|
||||
landmark_drawing_spec=drawing_spec,
|
||||
connection_drawing_spec=drawing_spec)
|
||||
cv2.imshow('MediaPipe FaceMesh', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
face_mesh.close()
|
||||
cap.release()
|
||||
```
|
||||
|
||||
### Face Effect Example
|
||||
|
||||
Face effect example showcases real-time mobile face effect application use case
|
||||
|
|
|
@ -55,13 +55,21 @@ frame, and only when the landmark model could no longer identify hand presence
|
|||
is palm detection invoked to relocalize the hand.
|
||||
|
||||
The pipeline is implemented as a MediaPipe
|
||||
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt),
|
||||
which internally utilizes a
|
||||
[palm/hand detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt),
|
||||
a
|
||||
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt)
|
||||
and a
|
||||
[renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt).
|
||||
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
|
||||
that uses a
|
||||
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
|
||||
from the
|
||||
[hand landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark),
|
||||
and renders using a dedicated
|
||||
[hand renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt).
|
||||
The
|
||||
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
|
||||
internally uses a
|
||||
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)
|
||||
from the same module and a
|
||||
[palm detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt)
|
||||
from the
|
||||
[palm detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection).
|
||||
|
||||
Note: To visualize a graph, copy the graph and paste it into
|
||||
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
|
||||
|
@ -146,34 +154,11 @@ to visualize its associated subgraphs, please see
|
|||
* iOS target:
|
||||
[`mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD)
|
||||
|
||||
#### With Multi-hand Support
|
||||
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
|
||||
* Android target:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1Wk6V9EVaz1ks_MInPqqVGvvJD01SGXDc)
|
||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu:multihandtrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD)
|
||||
* iOS target:
|
||||
[`mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu/BUILD)
|
||||
|
||||
There are two key differences between this graph and that in the
|
||||
[main example](#main-example) (which handles only one hand):
|
||||
|
||||
1. There is a `NormalizedRectVectorHasMinSize` calculator, that checks if in
|
||||
input vector of `NormalizedRect` objects has a minimum size equal to `N`. In
|
||||
this graph, if the vector contains fewer than `N` objects,
|
||||
`MultiHandDetection` subgraph runs. Otherwise, the `GateCalculator` doesn't
|
||||
send any image packets to the `MultiHandDetection` subgraph. This way, the
|
||||
main graph is efficient in that it avoids running the costly hand detection
|
||||
step when there are already `N` hands in the frame.
|
||||
2. The `MergeCalculator` has been replaced by the `AssociationNormRect`
|
||||
calculator. This `AssociationNormRect` takes as input a vector of
|
||||
`NormalizedRect` objects from the `MultiHandDetection` subgraph on the
|
||||
current frame, and a vector of `NormalizedRect` objects from the
|
||||
`MultiHandLandmark` subgraph from the previous frame, and performs an
|
||||
association operation between these objects. This calculator ensures that
|
||||
the output vector doesn't contain overlapping regions based on the specified
|
||||
`min_similarity_threshold`.
|
||||
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
|
||||
it, for Android modify `NUM_HANDS` in
|
||||
[MainActivity.java](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java),
|
||||
and for iOS modify `kNumHands` in
|
||||
[HandTrackingViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm).
|
||||
|
||||
#### Palm/Hand Detection Only (no landmarks)
|
||||
|
||||
|
@ -187,8 +172,6 @@ There are two key differences between this graph and that in the
|
|||
|
||||
### Desktop
|
||||
|
||||
#### Main Example
|
||||
|
||||
* Running on CPU
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt)
|
||||
|
@ -196,22 +179,101 @@ There are two key differences between this graph and that in the
|
|||
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
|
||||
* Running on GPU
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
|
||||
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_gpu.pbtxt)
|
||||
* Target:
|
||||
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
|
||||
|
||||
#### With Multi-hand Support
|
||||
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
|
||||
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
|
||||
|
||||
* Running on CPU
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live)
|
||||
* Target:
|
||||
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
|
||||
* Running on GPU
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
|
||||
* Target:
|
||||
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
|
||||
### Python
|
||||
|
||||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described below and in this
|
||||
[colab](https://mediapipe.page.link/hands_py_colab). If you do need to build the
|
||||
Python package from source, see
|
||||
[additional instructions](../getting_started/building_examples.md#python).
|
||||
|
||||
Activate a Python virtual environment:
|
||||
|
||||
```bash
|
||||
$ python3 -m venv mp_env && source mp_env/bin/activate
|
||||
```
|
||||
|
||||
Install MediaPipe Python package:
|
||||
|
||||
```bash
|
||||
(mp_env)$ pip install mediapipe
|
||||
```
|
||||
|
||||
Run the following Python code:
|
||||
|
||||
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/hands.py and copy it over. -->
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_hands = mp.solutions.hands
|
||||
|
||||
# For static images:
|
||||
hands = mp_hands.Hands(
|
||||
static_image_mode=True,
|
||||
max_num_hands=2,
|
||||
min_detection_confidence=0.7)
|
||||
for idx, file in enumerate(file_list):
|
||||
# Read an image, flip it around y-axis for correct handedness output (see
|
||||
# above).
|
||||
image = cv2.flip(cv2.imread(file), 1)
|
||||
# Convert the BGR image to RGB before processing.
|
||||
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
|
||||
# Print handedness and draw hand landmarks on the image.
|
||||
print('handedness:', results.multi_handedness)
|
||||
if not results.multi_hand_landmarks:
|
||||
continue
|
||||
annotated_image = image.copy()
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
print('hand_landmarks:', hand_landmarks)
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
|
||||
cv2.imwrite(
|
||||
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(image, 1))
|
||||
hands.close()
|
||||
|
||||
# For webcam input:
|
||||
hands = mp_hands.Hands(
|
||||
min_detection_confidence=0.7, min_tracking_confidence=0.5)
|
||||
cap = cv2.VideoCapture(0)
|
||||
while cap.isOpened():
|
||||
success, image = cap.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
# Flip the image horizontally for a later selfie-view display, and convert
|
||||
# the BGR image to RGB.
|
||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
||||
# To improve performance, optionally mark the image as not writeable to
|
||||
# pass by reference.
|
||||
image.flags.writeable = False
|
||||
results = hands.process(image)
|
||||
|
||||
# Draw the hand annotations on the image.
|
||||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
if results.multi_hand_landmarks:
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
|
||||
cv2.imshow('MediaPipe Hands', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
hands.close()
|
||||
cap.release()
|
||||
```
|
||||
|
||||
Tip: Use command `deactivate` to exit the Python virtual environment.
|
||||
|
||||
### Web
|
||||
|
||||
|
|
|
@ -37,10 +37,10 @@ nav_order: 30
|
|||
### [Hands](https://google.github.io/mediapipe/solutions/hands)
|
||||
|
||||
* Palm detection model:
|
||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite),
|
||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
|
||||
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
|
||||
* Hand landmark model:
|
||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite),
|
||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite),
|
||||
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
||||
* [Model card](https://mediapipe.page.link/handmc)
|
||||
|
||||
|
@ -68,6 +68,11 @@ nav_order: 30
|
|||
|
||||
* [TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_sneakers.tflite)
|
||||
* [TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_chair.tflite)
|
||||
* [TFLite model for cameras](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_camera.tflite)
|
||||
* [TFLite model for cups](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_cup.tflite)
|
||||
* [Single-stage TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_sneakers_1stage.tflite)
|
||||
* [Single-stage TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_chair_1stage.tflite)
|
||||
* [Model card](https://mediapipe.page.link/objectron-mc)
|
||||
|
||||
### [KNIFT](https://google.github.io/mediapipe/solutions/knift)
|
||||
|
||||
|
|
|
@ -15,13 +15,12 @@ nav_order: 10
|
|||
## Overview
|
||||
|
||||
MediaPipe Objectron is a mobile real-time 3D object detection solution for
|
||||
everyday objects. It detects objects in 2D images, and estimates their poses and
|
||||
sizes through a machine learning (ML) model, trained on a newly created 3D
|
||||
dataset.
|
||||
everyday objects. It detects objects in 2D images, and estimates their poses
|
||||
through a machine learning (ML) model, trained on a newly created 3D dataset.
|
||||
|
||||
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif)
|
||||
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------:
|
||||
*Fig 1(a). Objectron for Shoes.* | *Fig 1(b). Objectron for Chairs.*
|
||||
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif) | ![objectron_camera_android_gpu.gif](../images/mobile/objectron_camera_android_gpu.gif) | ![objectron_cup_android_gpu.gif](../images/mobile/objectron_cup_android_gpu.gif)
|
||||
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------:
|
||||
*Fig 1(a). Objectron for Shoes.* | *Fig 1(b). Objectron for Chairs.* | *Fig 1(c). Objectron for Cameras.* | *Fig 1(d). Objectron for Cups.*
|
||||
|
||||
Object detection is an extensively studied computer vision problem, but most of
|
||||
the research has focused on
|
||||
|
@ -85,15 +84,41 @@ able to increase the accuracy by about 10%.
|
|||
:-------------------------------------------------------------------------------------------: |
|
||||
*Fig 4. An example of AR synthetic data generation. The virtual white-brown cereal box is rendered into the real scene, next to the real blue book.* |
|
||||
|
||||
## ML Model for 3D Object Detection
|
||||
## ML Pipelines for 3D Object Detection
|
||||
|
||||
We built two ML pipelines to predict the 3D bounding box of an object from a
|
||||
single RGB image: one is a two-stage pipeline and the other is a single-stage
|
||||
pipeline. The two-stage pipeline is 3x faster than the single-stage pipeline
|
||||
with similar or better accuracy. The single stage pipeline is good at detecting
|
||||
multiple objects, whereas the two stage pipeline is good for a single dominant
|
||||
object.
|
||||
|
||||
### Two-stage Pipeline
|
||||
|
||||
Our two-stage pipeline is illustrated by the diagram in Fig 5. The first stage
|
||||
uses an object detector to find the 2D crop of the object. The second stage
|
||||
takes the image crop and estimates the 3D bounding box. At the same time, it
|
||||
also computes the 2D crop of the object for the next frame, such that the object
|
||||
detector does not need to run every frame.
|
||||
|
||||
![objectron_network_architecture.png](../images/objectron_2stage_network_architecture.png) |
|
||||
:----------------------------------------------------------------------------------------: |
|
||||
*Fig 5. Network architecture and post-processing for two-stage 3D object detection.* |
|
||||
|
||||
We can use any 2D object detector for the first stage. In this solution, we use
|
||||
[TensorFlow Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection).
|
||||
The second stage 3D bounding box predictor we released runs 83FPS on Adreno 650
|
||||
mobile GPU.
|
||||
|
||||
### Single-stage Pipeline
|
||||
|
||||
![objectron_network_architecture.png](../images/objectron_network_architecture.png) |
|
||||
:---------------------------------------------------------------------------------: |
|
||||
*Fig 5. Network architecture and post-processing for 3D object detection.* |
|
||||
*Fig 6. Network architecture and post-processing for single-stage 3D object detection.* |
|
||||
|
||||
We [built a single-stage model](https://arxiv.org/abs/2003.03522) to predict the
|
||||
pose and physical size of an object from a single RGB image. The model backbone
|
||||
has an encoder-decoder architecture, built upon
|
||||
Our [single-stage pipeline](https://arxiv.org/abs/2003.03522) is illustrated by
|
||||
the diagram in Fig 6, the model backbone has an encoder-decoder architecture,
|
||||
built upon
|
||||
[MobileNetv2](https://ai.googleblog.com/2018/04/mobilenetv2-next-generation-of-on.html).
|
||||
We employ a multi-task learning approach, jointly predicting an object's shape
|
||||
with detection and regression. The shape task predicts the object's shape
|
||||
|
@ -114,9 +139,9 @@ size of the object. The model is light enough to run real-time on mobile devices
|
|||
|
||||
![objectron_sample_network_results.png](../images/objectron_sample_network_results.png) |
|
||||
:-------------------------------------------------------------------------------------: |
|
||||
*Fig 6. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
|
||||
*Fig 7. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
|
||||
|
||||
## Detection and Tracking Pipeline
|
||||
#### Detection and Tracking
|
||||
|
||||
When the model is applied to every frame captured by the mobile device, it can
|
||||
suffer from jitter due to the ambiguity of the 3D bounding box estimated in each
|
||||
|
@ -130,7 +155,7 @@ temporally consistent, reducing the jitter.
|
|||
|
||||
The Objectron 3D object detection and tracking pipeline is implemented as a
|
||||
MediaPipe
|
||||
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt),
|
||||
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt),
|
||||
which internally uses a
|
||||
[detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt)
|
||||
and a
|
||||
|
@ -147,6 +172,12 @@ new detection becomes available from the detection subgraph, the tracking
|
|||
subgraph is also responsible for consolidation between the detection and
|
||||
tracking results, based on the area of overlap.
|
||||
|
||||
## Objectron Dataset
|
||||
|
||||
We also released our [Objectron dataset](http://objectron.dev), with which we
|
||||
trained our 3D object detection models. The technical details of the Objectron
|
||||
dataset, including usage and tutorials, are available on the dataset website.
|
||||
|
||||
## Example Apps
|
||||
|
||||
Please first see general instructions for
|
||||
|
@ -158,32 +189,72 @@ Note: To visualize a graph, copy the graph and paste it into
|
|||
to visualize its associated subgraphs, please see
|
||||
[visualizer documentation](../tools/visualizer.md).
|
||||
|
||||
### Objectron for Shoes
|
||||
### Two-stage Objectron
|
||||
|
||||
* Graph:
|
||||
[`mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt)
|
||||
* Android target:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1S0K4hbWt3o31FfQ4QU3Rz7IHrvOUMx1d)
|
||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
|
||||
* iOS target: Not available
|
||||
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
|
||||
|
||||
### Objectron for Chairs
|
||||
|
||||
* Graph:
|
||||
[`mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/chair_classic_occlusion_tracking.pbtxt)
|
||||
* Android target:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1MM8K-13bXLCVS1EHQ-KgkVyEahEPrKej)
|
||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
|
||||
and add `--define chair=true` to the build command, i.e.,
|
||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
|
||||
|
||||
Build for **shoes** (default) with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1ANW9WDOCb8QO1r8gDC03A4UgrPkICdPP/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
Build for **chairs** with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1lcUv1TBnv_SxnKSQwdOqbdLa9mkaTJHy/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 --define chair=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
Build for **cups** with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1bf77KDkowwrduleiC9B1M1XnEhjnOQbX/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 --define cup=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
Build for **cameras** with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GM7lPO-s5URVxIzQur1bLsionEJs3yIl/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 --define camera=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
* iOS target: Not available
|
||||
|
||||
### Single-stage Objectron
|
||||
|
||||
* Graph:
|
||||
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
|
||||
|
||||
* Android target:
|
||||
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
|
||||
|
||||
Build with **single-stage** model for **shoes** with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1MvaEg4dkvKN8jAU1Z2GtudyXi1rQHYsE/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 --define shoe_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
Build with **single-stage** model for **chairs** with:
|
||||
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GJL4z3jr-wD1jMHGd4NBfOG-Yoq5t167/view?usp=sharing)
|
||||
|
||||
```bash
|
||||
bazel build -c opt --config android_arm64 --define chair_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
|
||||
```
|
||||
|
||||
* iOS target: Not available
|
||||
|
||||
## Resources
|
||||
|
||||
* Google AI Blog:
|
||||
[Announcing the Objectron Dataset](https://mediapipe.page.link/objectron_dataset_ai_blog)
|
||||
* Google AI Blog:
|
||||
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
|
||||
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak
|
||||
|
|
|
@ -5,7 +5,7 @@ parent: Solutions
|
|||
nav_order: 5
|
||||
---
|
||||
|
||||
# MediaPipe BlazePose
|
||||
# MediaPipe Pose
|
||||
{: .no_toc }
|
||||
|
||||
1. TOC
|
||||
|
@ -88,12 +88,11 @@ hip midpoints.
|
|||
### Pose Landmark Model (BlazePose Tracker)
|
||||
|
||||
The landmark model currently included in MediaPipe Pose predicts the location of
|
||||
25 upper-body landmarks (see figure below), each with `(x, y, z, visibility)`,
|
||||
plus two virtual alignment keypoints. Note that the `z` value should be
|
||||
discarded as the model is currently not fully trained to predict depth, but this
|
||||
is something we have on the roadmap. The model shares the same architecture as
|
||||
the full-body version that predicts 33 landmarks, described in more detail in
|
||||
the
|
||||
25 upper-body landmarks (see figure below), each with `(x, y, z, visibility)`.
|
||||
Note that the `z` value should be discarded as the model is currently not fully
|
||||
trained to predict depth, but this is something we have on the roadmap. The
|
||||
model shares the same architecture as the full-body version that predicts 33
|
||||
landmarks, described in more detail in the
|
||||
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
|
||||
and in this [paper](https://arxiv.org/abs/2006.10204).
|
||||
|
||||
|
@ -147,35 +146,77 @@ MediaPipe examples.
|
|||
MediaPipe Python package is available on
|
||||
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
|
||||
install mediapipe` on Linux and macOS, as described below and in this
|
||||
[colab](https://mediapipe.page.link/mp-py-colab). If you do need to build the
|
||||
[colab](https://mediapipe.page.link/pose_py_colab). If you do need to build the
|
||||
Python package from source, see
|
||||
[additional instructions](../getting_started/building_examples.md#python).
|
||||
|
||||
Activate a Python virtual environment:
|
||||
|
||||
```bash
|
||||
# Activate a Python virtual environment.
|
||||
$ python3 -m venv mp_env && source mp_env/bin/activate
|
||||
```
|
||||
|
||||
# Install MediaPipe Python package
|
||||
Install MediaPipe Python package:
|
||||
|
||||
```bash
|
||||
(mp_env)$ pip install mediapipe
|
||||
```
|
||||
|
||||
# Run in Python interpreter
|
||||
(mp_env)$ python3
|
||||
>>> import mediapipe as mp
|
||||
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
|
||||
Run the following Python code:
|
||||
|
||||
# For image input
|
||||
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
|
||||
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
|
||||
# To print out the pose landmarks, you can simply do "print(pose_landmarks)".
|
||||
# However, the data points can be more accessible with the following approach.
|
||||
>>> [print('x is', data_point.x, 'y is', data_point.y, 'z is', data_point.z, 'visibility is', data_point.visibility) for data_point in pose_landmarks.landmark]
|
||||
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/pose.py and copy it over. -->
|
||||
|
||||
# For live camera input
|
||||
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
|
||||
>>> pose_tracker.run_live()
|
||||
```python
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_pose = mp.solutions.pose
|
||||
|
||||
# Close the tracker.
|
||||
>>> pose_tracker.close()
|
||||
# For static images:
|
||||
pose = mp_pose.Pose(
|
||||
static_image_mode=True, min_detection_confidence=0.5)
|
||||
for idx, file in enumerate(file_list):
|
||||
image = cv2.imread(file)
|
||||
# Convert the BGR image to RGB before processing.
|
||||
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
|
||||
# Print and draw pose landmarks on the image.
|
||||
print(
|
||||
'nose landmark:',
|
||||
results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE])
|
||||
annotated_image = image.copy()
|
||||
mp_drawing.draw_landmarks(
|
||||
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', image)
|
||||
pose.close()
|
||||
|
||||
# For webcam input:
|
||||
pose = mp_pose.Pose(
|
||||
min_detection_confidence=0.5, min_tracking_confidence=0.5)
|
||||
cap = cv2.VideoCapture(0)
|
||||
while cap.isOpened():
|
||||
success, image = cap.read()
|
||||
if not success:
|
||||
break
|
||||
|
||||
# Flip the image horizontally for a later selfie-view display, and convert
|
||||
# the BGR image to RGB.
|
||||
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
|
||||
# To improve performance, optionally mark the image as not writeable to
|
||||
# pass by reference.
|
||||
image.flags.writeable = False
|
||||
results = pose.process(image)
|
||||
|
||||
# Draw the pose annotation on the image.
|
||||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
mp_drawing.draw_landmarks(
|
||||
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
|
||||
cv2.imshow('MediaPipe Pose', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
||||
pose.close()
|
||||
cap.release()
|
||||
```
|
||||
|
||||
Tip: Use command `deactivate` to exit the Python virtual environment.
|
||||
|
|
|
@ -19,9 +19,9 @@ has_toc: false
|
|||
[]() | Android | iOS | Desktop | Python | Web | Coral
|
||||
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
|
||||
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
|
||||
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | ✅ | |
|
||||
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
|
||||
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
|
||||
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
"mediapipe/examples/ios/handdetectiongpu/BUILD",
|
||||
"mediapipe/examples/ios/handtrackinggpu/BUILD",
|
||||
"mediapipe/examples/ios/iristrackinggpu/BUILD",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu/BUILD",
|
||||
"mediapipe/examples/ios/objectdetectioncpu/BUILD",
|
||||
"mediapipe/examples/ios/objectdetectiongpu/BUILD",
|
||||
"mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD"
|
||||
|
@ -29,7 +28,6 @@
|
|||
"//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
|
||||
"//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
|
||||
"//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp",
|
||||
"//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
|
||||
"//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
|
||||
"//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
|
||||
"//mediapipe/examples/ios/upperbodyposetrackinggpu:UpperBodyPoseTrackingGpuApp",
|
||||
|
@ -97,7 +95,6 @@
|
|||
"mediapipe/examples/ios/handdetectiongpu",
|
||||
"mediapipe/examples/ios/handtrackinggpu",
|
||||
"mediapipe/examples/ios/iristrackinggpu",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu",
|
||||
"mediapipe/examples/ios/objectdetectioncpu",
|
||||
"mediapipe/examples/ios/objectdetectiongpu",
|
||||
"mediapipe/examples/ios/upperbodyposetrackinggpu",
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
"mediapipe/examples/ios/handdetectiongpu",
|
||||
"mediapipe/examples/ios/handtrackinggpu",
|
||||
"mediapipe/examples/ios/iristrackinggpu",
|
||||
"mediapipe/examples/ios/multihandtrackinggpu",
|
||||
"mediapipe/examples/ios/objectdetectioncpu",
|
||||
"mediapipe/examples/ios/objectdetectiongpu",
|
||||
"mediapipe/examples/ios/upperbodyposetrackinggpu"
|
||||
|
|
|
@ -116,6 +116,7 @@ mediapipe_proto_library(
|
|||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/framework/formats:classification_proto",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -240,6 +241,7 @@ cc_library(
|
|||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
|
@ -800,14 +802,23 @@ cc_library(
|
|||
name = "split_vector_calculator",
|
||||
srcs = ["split_vector_calculator.cc"],
|
||||
hdrs = ["split_vector_calculator.h"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":split_vector_calculator_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/util:resource_util",
|
||||
|
@ -1069,6 +1080,7 @@ cc_library(
|
|||
":constant_side_packet_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:collection_item_id",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message ClipVectorSizeCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional ClipVectorSizeCalculatorOptions ext = 274674998;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
|
||||
|
@ -64,6 +65,9 @@ typedef ConcatenateVectorCalculator<TfLiteTensor>
|
|||
ConcatenateTfLiteTensorVectorCalculator;
|
||||
REGISTER_CALCULATOR(ConcatenateTfLiteTensorVectorCalculator);
|
||||
|
||||
typedef ConcatenateVectorCalculator<Tensor> ConcatenateTensorVectorCalculator;
|
||||
REGISTER_CALCULATOR(ConcatenateTensorVectorCalculator);
|
||||
|
||||
typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
|
||||
ConcatenateLandmarkVectorCalculator;
|
||||
REGISTER_CALCULATOR(ConcatenateLandmarkVectorCalculator);
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message ConcatenateVectorCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional ConcatenateVectorCalculatorOptions ext = 259397839;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "mediapipe/calculators/core/constant_side_packet_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/collection_item_id.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
|
@ -24,6 +25,8 @@
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {} // namespace
|
||||
|
||||
// Generates an output side packet or multiple output side packets according to
|
||||
// the specified options.
|
||||
//
|
||||
|
@ -74,6 +77,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
|
|||
packet.Set<std::string>();
|
||||
} else if (packet_options.has_uint64_value()) {
|
||||
packet.Set<uint64>();
|
||||
} else if (packet_options.has_classification_list_value()) {
|
||||
packet.Set<ClassificationList>();
|
||||
} else {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"None of supported values were specified in options.");
|
||||
|
@ -100,6 +105,9 @@ class ConstantSidePacketCalculator : public CalculatorBase {
|
|||
packet.Set(MakePacket<std::string>(packet_options.string_value()));
|
||||
} else if (packet_options.has_uint64_value()) {
|
||||
packet.Set(MakePacket<uint64>(packet_options.uint64_value()));
|
||||
} else if (packet_options.has_classification_list_value()) {
|
||||
packet.Set(MakePacket<ClassificationList>(
|
||||
packet_options.classification_list_value()));
|
||||
} else {
|
||||
return ::mediapipe::InvalidArgumentError(
|
||||
"None of supported values were specified in options.");
|
||||
|
|
|
@ -17,6 +17,9 @@ syntax = "proto2";
|
|||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/framework/formats/classification.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message ConstantSidePacketCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
|
@ -30,6 +33,7 @@ message ConstantSidePacketCalculatorOptions {
|
|||
bool bool_value = 3;
|
||||
string string_value = 4;
|
||||
uint64 uint64_value = 5;
|
||||
ClassificationList classification_list_value = 6;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message DequantizeByteArrayCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional DequantizeByteArrayCalculatorOptions ext = 272316343;
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message GateCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional GateCalculatorOptions ext = 261754847;
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message PacketClonerCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional PacketClonerCalculatorOptions ext = 258872085;
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message PacketResamplerCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional PacketResamplerCalculatorOptions ext = 95743844;
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message PacketThinnerCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional PacketThinnerCalculatorOptions ext = 288533508;
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message QuantizeFloatVectorCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional QuantizeFloatVectorCalculatorOptions ext = 259848061;
|
||||
|
|
|
@ -32,6 +32,9 @@ class SequenceShiftCalculator : public CalculatorBase {
|
|||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Index(0).SetAny();
|
||||
if (cc->InputSidePackets().HasTag(kPacketOffsetTag)) {
|
||||
cc->InputSidePackets().Tag(kPacketOffsetTag).Set<int>();
|
||||
}
|
||||
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
@ -41,6 +44,8 @@ class SequenceShiftCalculator : public CalculatorBase {
|
|||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
static constexpr const char* kPacketOffsetTag = "PACKET_OFFSET";
|
||||
|
||||
// A positive offset means we want a packet to be output with the timestamp of
|
||||
// a later packet. Stores packets waiting for their output timestamps and
|
||||
// outputs a single packet when the cache fills.
|
||||
|
@ -70,6 +75,9 @@ REGISTER_CALCULATOR(SequenceShiftCalculator);
|
|||
::mediapipe::Status SequenceShiftCalculator::Open(CalculatorContext* cc) {
|
||||
packet_offset_ =
|
||||
cc->Options<mediapipe::SequenceShiftCalculatorOptions>().packet_offset();
|
||||
if (cc->InputSidePackets().HasTag(kPacketOffsetTag)) {
|
||||
packet_offset_ = cc->InputSidePackets().Tag(kPacketOffsetTag).Get<int>();
|
||||
}
|
||||
cache_size_ = abs(packet_offset_);
|
||||
// An offset of zero is a no-op, but someone might still request it.
|
||||
if (packet_offset_ == 0) {
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
message SequenceShiftCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional SequenceShiftCalculatorOptions ext = 107633927;
|
||||
|
|
|
@ -99,6 +99,35 @@ TEST(SequenceShiftCalculatorTest, NegativeShift) {
|
|||
}
|
||||
}
|
||||
|
||||
// Tests using a side packet to specify the offset. Shifting by -2, i.e.,
|
||||
// output input[i] with timestamp[i - 2]. The first two packets should be
|
||||
// dropped.
|
||||
TEST(SequenceShiftCalculatorTest, SidePacketOffset) {
|
||||
CalculatorGraphConfig::Node node;
|
||||
node.set_calculator("SequenceShiftCalculator");
|
||||
node.add_input_stream("input");
|
||||
node.add_output_stream("output");
|
||||
node.add_input_side_packet("PACKET_OFFSET:packet_offset");
|
||||
|
||||
CalculatorRunner runner(node);
|
||||
AddPackets(&runner);
|
||||
runner.MutableSidePackets()->Tag("PACKET_OFFSET") = Adopt(new int(-2));
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
const std::vector<Packet>& input_packets =
|
||||
runner.MutableInputs()->Index(0).packets;
|
||||
const std::vector<Packet>& output_packets = runner.Outputs().Index(0).packets;
|
||||
ASSERT_EQ(10, input_packets.size());
|
||||
// Input packet[i] should be output with the timestamp of input packet[i - 2].
|
||||
// The first two packets are dropped. This means timestamps match between
|
||||
// input and output packets, but the data in the output packets come from
|
||||
// input_packets[i + 2].
|
||||
ASSERT_EQ(8, output_packets.size());
|
||||
for (int i = 0; i < output_packets.size(); ++i) {
|
||||
EXPECT_EQ(input_packets[i].Timestamp(), output_packets[i].Timestamp());
|
||||
EXPECT_EQ(input_packets[i + 2].Get<int>(), output_packets[i].Get<int>());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -16,10 +16,12 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
|
||||
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
|
||||
|
@ -46,15 +48,18 @@ typedef SplitVectorCalculator<TfLiteTensor, false>
|
|||
SplitTfLiteTensorVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitTfLiteTensorVectorCalculator);
|
||||
|
||||
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark, false>
|
||||
typedef SplitVectorCalculator<Tensor, true> SplitTensorVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitTensorVectorCalculator);
|
||||
|
||||
typedef SplitVectorCalculator<mediapipe::NormalizedLandmark, false>
|
||||
SplitLandmarkVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitLandmarkVectorCalculator);
|
||||
|
||||
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmarkList, false>
|
||||
typedef SplitVectorCalculator<mediapipe::NormalizedLandmarkList, false>
|
||||
SplitNormalizedLandmarkListVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitNormalizedLandmarkListVectorCalculator);
|
||||
|
||||
typedef SplitVectorCalculator<::mediapipe::NormalizedRect, false>
|
||||
typedef SplitVectorCalculator<mediapipe::NormalizedRect, false>
|
||||
SplitNormalizedRectVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator);
|
||||
|
||||
|
@ -67,8 +72,12 @@ typedef SplitVectorCalculator<::tflite::gpu::gl::GlBuffer, true>
|
|||
REGISTER_CALCULATOR(MovableSplitGlBufferVectorCalculator);
|
||||
#endif
|
||||
|
||||
typedef SplitVectorCalculator<::mediapipe::Detection, false>
|
||||
typedef SplitVectorCalculator<mediapipe::Detection, false>
|
||||
SplitDetectionVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitDetectionVectorCalculator);
|
||||
|
||||
typedef SplitVectorCalculator<mediapipe::ClassificationList, false>
|
||||
SplitClassificationListVectorCalculator;
|
||||
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -18,6 +18,8 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
// A Range {begin, end} specifies beginning ane ending indices to splice a
|
||||
// vector. A vector v is spliced to have elements v[begin:(end-1)], i.e., with
|
||||
// begin index inclusive and end index exclusive.
|
||||
|
|
|
@ -107,7 +107,7 @@ class BilateralFilterCalculator : public CalculatorBase {
|
|||
GLuint program_ = 0;
|
||||
GLuint vao_;
|
||||
GLuint vbo_[2]; // vertex storage
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
};
|
||||
REGISTER_CALCULATOR(BilateralFilterCalculator);
|
||||
|
||||
|
|
|
@ -519,7 +519,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
renderer = yuv_renderer_.get();
|
||||
src1 = gpu_helper_.CreateSourceTexture(input, 0);
|
||||
} else // NOLINT(readability/braces)
|
||||
#endif // iOS
|
||||
#endif // iOS
|
||||
{
|
||||
src1 = gpu_helper_.CreateSourceTexture(input);
|
||||
#if defined(TEXTURE_EXTERNAL_OES)
|
||||
|
@ -531,7 +531,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
|
|||
}
|
||||
renderer = ext_rgb_renderer_.get();
|
||||
} else // NOLINT(readability/braces)
|
||||
#endif // TEXTURE_EXTERNAL_OES
|
||||
#endif // TEXTURE_EXTERNAL_OES
|
||||
{
|
||||
if (!rgb_renderer_) {
|
||||
rgb_renderer_ = absl::make_unique<QuadRenderer>();
|
||||
|
|
631
mediapipe/calculators/tensor/BUILD
Normal file
|
@ -0,0 +1,631 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
selects.config_setting_group(
|
||||
name = "compute_shader_unavailable",
|
||||
match_any = [
|
||||
"//mediapipe/gpu:disable_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "inference_calculator_proto",
|
||||
srcs = ["inference_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "inference_calculator",
|
||||
srcs = ["inference_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
features = ["-layering_check"], # allow depending on inference_calculator_gpu_deps
|
||||
linkopts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-framework CoreVideo",
|
||||
"-framework MetalKit",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":inference_calculator_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/util:resource_util",
|
||||
"//mediapipe/util/tflite:config",
|
||||
"@org_tensorflow//tensorflow/lite:framework",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
|
||||
"//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
] + select({
|
||||
":compute_shader_unavailable": [],
|
||||
"//conditions:default": [":inference_calculator_gpu_deps"],
|
||||
}) + select({
|
||||
"//conditions:default": [],
|
||||
"//mediapipe:android": [
|
||||
"//mediapipe/util/android/file/base",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
|
||||
],
|
||||
}) + select({
|
||||
"//conditions:default": [
|
||||
"//mediapipe/util:cpu_util",
|
||||
],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "inference_calculator_gpu_deps",
|
||||
deps = selects.with_or({
|
||||
"//mediapipe:ios": [
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
"//mediapipe/gpu:MPPMetalUtil",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/objc:mediapipe_framework_ios",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/metal:buffer_convert",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
|
||||
],
|
||||
"//mediapipe:macos": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/util/tflite:tflite_gpu_runner",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "tensor_converter_calculator_proto",
|
||||
srcs = ["tensor_converter_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensor_converter_calculator",
|
||||
srcs = ["tensor_converter_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
features = ["-layering_check"], # allow depending on tensor_converter_calculator_gpu_deps
|
||||
linkopts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-framework CoreVideo",
|
||||
"-framework MetalKit",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":tensor_converter_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework:port",
|
||||
"//mediapipe/util:resource_util",
|
||||
] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"//conditions:default": ["tensor_converter_calculator_gpu_deps"],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensor_converter_calculator_gpu_deps",
|
||||
deps = select({
|
||||
"//mediapipe:android": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
"//mediapipe:ios": [
|
||||
"//mediapipe/gpu:MPPMetalUtil",
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
"//mediapipe/objc:mediapipe_framework_ios",
|
||||
],
|
||||
"//mediapipe:macos": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gl_simple_shaders",
|
||||
"//mediapipe/gpu:shader_util",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "tensor_converter_calculator_test",
|
||||
srcs = ["tensor_converter_calculator_test.cc"],
|
||||
deps = [
|
||||
":tensor_converter_calculator",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/tool:validate_type",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "tensors_to_detections_calculator_proto",
|
||||
srcs = ["tensors_to_detections_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_detections_calculator",
|
||||
srcs = ["tensors_to_detections_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
features = ["-layering_check"], # allow depending on tensors_to_detections_calculator_gpu_deps
|
||||
linkopts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-framework CoreVideo",
|
||||
"-framework MetalKit",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":tensors_to_detections_calculator_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:port",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/formats:location",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/formats/object_detection:anchor_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
] + select({
|
||||
":compute_shader_unavailable": [],
|
||||
"//conditions:default": [":tensors_to_detections_calculator_gpu_deps"],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_detections_calculator_gpu_deps",
|
||||
deps = select({
|
||||
"//mediapipe:ios": [
|
||||
"//mediapipe/gpu:MPPMetalUtil",
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
],
|
||||
"//mediapipe:macos": [],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "tensors_to_landmarks_calculator_proto",
|
||||
srcs = ["tensors_to_landmarks_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_landmarks_calculator",
|
||||
srcs = ["tensors_to_landmarks_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":tensors_to_landmarks_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_floats_calculator",
|
||||
srcs = ["tensors_to_floats_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "tensors_to_classification_calculator",
|
||||
srcs = ["tensors_to_classification_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":tensors_to_classification_calculator_cc_proto",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:location",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/util:resource_util",
|
||||
] + select({
|
||||
"//mediapipe:android": [
|
||||
"//mediapipe/util/android/file/base",
|
||||
],
|
||||
"//mediapipe:ios": [
|
||||
"//mediapipe/util/android/file/base",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"//mediapipe/framework/port:file_helpers",
|
||||
],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "tensors_to_classification_calculator_proto",
|
||||
srcs = ["tensors_to_classification_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "tensors_to_classification_calculator_test",
|
||||
srcs = ["tensors_to_classification_calculator_test.cc"],
|
||||
data = ["testdata/labelmap.txt"],
|
||||
deps = [
|
||||
":tensors_to_classification_calculator",
|
||||
":tensors_to_classification_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_calculator",
|
||||
srcs = ["image_to_tensor_calculator.cc"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
features = ["-layering_check"], # allow depending on image_to_tensor_calculator_gpu_deps
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":image_to_tensor_calculator_cc_proto",
|
||||
":image_to_tensor_converter",
|
||||
":image_to_tensor_converter_opencv",
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:port",
|
||||
] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"//conditions:default": [":image_to_tensor_calculator_gpu_deps"],
|
||||
}),
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_calculator_gpu_deps",
|
||||
deps = select({
|
||||
"//mediapipe:android": [
|
||||
":image_to_tensor_converter_gl_buffer",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
"//mediapipe:apple": [
|
||||
":image_to_tensor_converter_metal",
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
"//conditions:default": [
|
||||
":image_to_tensor_converter_gl_buffer",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "image_to_tensor_calculator_proto",
|
||||
srcs = ["image_to_tensor_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "image_to_tensor_calculator_test",
|
||||
srcs = ["image_to_tensor_calculator_test.cc"],
|
||||
data = [
|
||||
"testdata/image_to_tensor/input.jpg",
|
||||
"testdata/image_to_tensor/large_sub_rect.png",
|
||||
"testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
|
||||
"testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
|
||||
"testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
|
||||
"testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
|
||||
"testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
|
||||
"testdata/image_to_tensor/noop_except_range.png",
|
||||
],
|
||||
deps = [
|
||||
":image_to_tensor_calculator",
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/tool:validate_type",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@org_tensorflow//tensorflow/lite:framework",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_converter",
|
||||
hdrs = ["image_to_tensor_converter.h"],
|
||||
deps = [
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/framework:packet",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_converter_opencv",
|
||||
srcs = ["image_to_tensor_converter_opencv.cc"],
|
||||
hdrs = ["image_to_tensor_converter_opencv.h"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = [
|
||||
":image_to_tensor_converter",
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_converter_gl_buffer",
|
||||
srcs = ["image_to_tensor_converter_gl_buffer.cc"],
|
||||
hdrs = ["image_to_tensor_converter_gl_buffer.h"],
|
||||
deps = ["//mediapipe/framework:port"] + select({
|
||||
"//mediapipe:apple": [],
|
||||
"//conditions:default": [
|
||||
":image_to_tensor_converter",
|
||||
":image_to_tensor_utils",
|
||||
"@com_google_absl//absl/strings",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/gpu:gpu_buffer_format",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:types",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:command_queue",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_call",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:request_gpu_info",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:variable",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_converter_gl_texture",
|
||||
srcs = ["image_to_tensor_converter_gl_texture.cc"],
|
||||
hdrs = ["image_to_tensor_converter_gl_texture.h"],
|
||||
deps = ["//mediapipe/framework:port"] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
"//conditions:default": [
|
||||
":image_to_tensor_converter",
|
||||
":image_to_tensor_utils",
|
||||
"@com_google_absl//absl/strings",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gl_simple_shaders",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/gpu:shader_util",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_converter_metal",
|
||||
srcs = ["image_to_tensor_converter_metal.cc"],
|
||||
hdrs = ["image_to_tensor_converter_metal.h"],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
"-fobjc-arc", # enable reference-counting
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
linkopts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-framework CoreVideo",
|
||||
"-framework MetalKit",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = ["//mediapipe/framework:port"] + select({
|
||||
"//mediapipe:apple": [
|
||||
":image_to_tensor_converter",
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/gpu:MPPMetalHelper",
|
||||
"@com_google_absl//absl/strings",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/gpu:gpu_buffer_format",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:types",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "image_to_tensor_utils",
|
||||
srcs = ["image_to_tensor_utils.cc"],
|
||||
hdrs = ["image_to_tensor_utils.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "image_to_tensor_utils_test",
|
||||
srcs = ["image_to_tensor_utils_test.cc"],
|
||||
deps = [
|
||||
":image_to_tensor_utils",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
],
|
||||
)
|
275
mediapipe/calculators/tensor/image_to_tensor_calculator.cc
Normal file
|
@ -0,0 +1,275 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_metal.h"
|
||||
#include "mediapipe/gpu/MPPMetalHelper.h"
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#else
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace {
|
||||
constexpr char kInputCpu[] = "IMAGE";
|
||||
constexpr char kInputGpu[] = "IMAGE_GPU";
|
||||
constexpr char kOutputMatrix[] = "MATRIX";
|
||||
constexpr char kOutput[] = "TENSORS";
|
||||
constexpr char kInputNormRect[] = "NORM_RECT";
|
||||
constexpr char kOutputLetterboxPadding[] = "LETTERBOX_PADDING";
|
||||
} // namespace
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Converts image into Tensor, possibly with cropping, resizing and
|
||||
// normalization, according to specified inputs and options.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE - ImageFrame [ImageFormat::SRGB/SRGBA]
|
||||
// Image to extract from.
|
||||
// IMAGE_GPU - GpuBuffer [GpuBufferFormat::kBGRA32]
|
||||
// Image to extract from.
|
||||
// (Either IMAGE or IMAGE_GPU has to be specified.)
|
||||
//
|
||||
// NORM_RECT - NormalizedRect @Optional
|
||||
// Describes region of image to extract.
|
||||
// @Optional: rect covering the whole image is used if not specified.
|
||||
//
|
||||
// Outputs:
|
||||
// TENSORS - std::vector<Tensor>
|
||||
// Vector containing a single Tensor populated with an extrated RGB image.
|
||||
// MATRIX - std::array<float, 16> @Optional
|
||||
// An std::array<float, 16> representing a 4x4 row-major-order matrix which
|
||||
// can be used to map a point on the output tensor to a point on the input
|
||||
// image.
|
||||
// LETTERBOX_PADDING - std::array<float, 4> @Optional
|
||||
// An std::array<float, 4> representing the letterbox padding from the 4
|
||||
// sides ([left, top, right, bottom]) of the output image, normalized to
|
||||
// [0.f, 1.f] by the output dimensions. The padding values are non-zero only
|
||||
// when the "keep_aspect_ratio" is true.
|
||||
//
|
||||
// For instance, when the input image is 10x10 (width x height) and the
|
||||
// output dimensions specified in the calculator option are 20x40 and
|
||||
// "keep_aspect_ratio" is true, the calculator scales the input image to
|
||||
// 20x20 and places it in the middle of the output image with an equal
|
||||
// padding of 10 pixels at the top and the bottom. The resulting array is
|
||||
// therefore [0.f, 0.25f, 0.f, 0.25f] (10/40 = 0.25f).
|
||||
//
|
||||
// Example:
|
||||
// node {
|
||||
// calculator: "ImageToTensorCalculator"
|
||||
// input_stream: "IMAGE:image" # or "IMAGE_GPU:image"
|
||||
// input_stream: "NORM_RECT:roi"
|
||||
// output_stream: "TENSORS:tensors"
|
||||
// output_stream: "MATRIX:matrix"
|
||||
// options {
|
||||
// [mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
// output_tensor_width: 256
|
||||
// output_tensor_height: 256
|
||||
// keep_aspect_ratio: false
|
||||
// output_tensor_float_range {
|
||||
// min: 0.0
|
||||
// max: 1.0
|
||||
// }
|
||||
// # gpu_origin: CONVENTIONAL # or TOP_LEFT
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class ImageToTensorCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
const auto& options =
|
||||
cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
|
||||
|
||||
RET_CHECK(options.has_output_tensor_float_range())
|
||||
<< "Output tensor range is required.";
|
||||
RET_CHECK_LT(options.output_tensor_float_range().min(),
|
||||
options.output_tensor_float_range().max())
|
||||
<< "Valid output tensor range is required.";
|
||||
RET_CHECK_GT(options.output_tensor_width(), 0)
|
||||
<< "Valid output tensor width is required.";
|
||||
RET_CHECK_GT(options.output_tensor_height(), 0)
|
||||
<< "Valid output tensor height is required.";
|
||||
|
||||
if (cc->Inputs().HasTag(kInputNormRect)) {
|
||||
cc->Inputs().Tag(kInputNormRect).Set<mediapipe::NormalizedRect>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputLetterboxPadding)) {
|
||||
cc->Outputs().Tag(kOutputLetterboxPadding).Set<std::array<float, 4>>();
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputMatrix)) {
|
||||
cc->Outputs().Tag(kOutputMatrix).Set<std::array<float, 16>>();
|
||||
}
|
||||
|
||||
const bool has_cpu_input = cc->Inputs().HasTag(kInputCpu);
|
||||
const bool has_gpu_input = cc->Inputs().HasTag(kInputGpu);
|
||||
RET_CHECK_EQ((has_cpu_input ? 1 : 0) + (has_gpu_input ? 1 : 0), 1)
|
||||
<< "Either CPU or GPU input is expected, not both.";
|
||||
|
||||
if (has_cpu_input) {
|
||||
cc->Inputs().Tag(kInputCpu).Set<mediapipe::ImageFrame>();
|
||||
} else if (has_gpu_input) {
|
||||
#if MEDIAPIPE_DISABLE_GPU
|
||||
return mediapipe::UnimplementedError("GPU processing is disabled");
|
||||
#else
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
|
||||
#else
|
||||
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
cc->Inputs().Tag(kInputGpu).Set<mediapipe::GpuBuffer>();
|
||||
|
||||
#endif // MEDIAPIPE_DISABLE_GPU
|
||||
}
|
||||
cc->Outputs().Tag(kOutput).Set<std::vector<Tensor>>();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) {
|
||||
// Makes sure outputs' next timestamp bound update is handled automatically
|
||||
// by the framework.
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
|
||||
output_width_ = options_.output_tensor_width();
|
||||
output_height_ = options_.output_tensor_height();
|
||||
range_min_ = options_.output_tensor_float_range().min();
|
||||
range_max_ = options_.output_tensor_float_range().max();
|
||||
|
||||
if (cc->Inputs().HasTag(kInputCpu)) {
|
||||
ASSIGN_OR_RETURN(converter_, CreateOpenCvConverter(cc));
|
||||
} else {
|
||||
#if MEDIAPIPE_DISABLE_GPU
|
||||
return mediapipe::UnimplementedError("GPU processing is disabled");
|
||||
#else
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
ASSIGN_OR_RETURN(converter_, CreateMetalConverter(cc));
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
ASSIGN_OR_RETURN(converter_, CreateImageToGlBufferTensorConverter(
|
||||
cc, DoesInputStartAtBottom()));
|
||||
#else
|
||||
ASSIGN_OR_RETURN(converter_, CreateImageToGlTextureTensorConverter(
|
||||
cc, DoesInputStartAtBottom()));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#endif // MEDIAPIPE_DISABLE_GPU
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) {
|
||||
const InputStreamShard& input = cc->Inputs().Tag(
|
||||
cc->Inputs().HasTag(kInputCpu) ? kInputCpu : kInputGpu);
|
||||
if (input.IsEmpty()) {
|
||||
// Timestamp bound update happens automatically. (See Open().)
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
absl::optional<mediapipe::NormalizedRect> norm_rect;
|
||||
if (cc->Inputs().HasTag(kInputNormRect)) {
|
||||
if (cc->Inputs().Tag(kInputNormRect).IsEmpty()) {
|
||||
// Timestamp bound update happens automatically. (See Open().)
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
norm_rect =
|
||||
cc->Inputs().Tag(kInputNormRect).Get<mediapipe::NormalizedRect>();
|
||||
if (norm_rect->width() == 0 && norm_rect->height() == 0) {
|
||||
// WORKAROUND: some existing graphs may use sentinel rects {width=0,
|
||||
// height=0, ...} quite often and calculator has to handle them
|
||||
// gracefully by updating timestamp bound instead of returning failure.
|
||||
// Timestamp bound update happens automatically. (See Open().)
|
||||
// NOTE: usage of sentinel rects should be avoided.
|
||||
DLOG(WARNING)
|
||||
<< "Updating timestamp bound in response to a sentinel rect";
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
}
|
||||
|
||||
const Packet& image_packet = input.Value();
|
||||
const Size& size = converter_->GetImageSize(image_packet);
|
||||
RotatedRect roi = GetRoi(size.width, size.height, norm_rect);
|
||||
ASSIGN_OR_RETURN(auto padding, PadRoi(options_.output_tensor_width(),
|
||||
options_.output_tensor_height(),
|
||||
options_.keep_aspect_ratio(), &roi));
|
||||
if (cc->Outputs().HasTag(kOutputLetterboxPadding)) {
|
||||
cc->Outputs()
|
||||
.Tag(kOutputLetterboxPadding)
|
||||
.AddPacket(MakePacket<std::array<float, 4>>(padding).At(
|
||||
cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kOutputMatrix)) {
|
||||
std::array<float, 16> matrix;
|
||||
GetRotatedSubRectToRectTransformMatrix(roi, size.width, size.height,
|
||||
/*flip_horizontaly=*/false,
|
||||
&matrix);
|
||||
cc->Outputs()
|
||||
.Tag(kOutputMatrix)
|
||||
.AddPacket(MakePacket<std::array<float, 16>>(std::move(matrix))
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
Tensor tensor,
|
||||
converter_->Convert(image_packet, roi, {output_width_, output_height_},
|
||||
range_min_, range_max_));
|
||||
|
||||
std::vector<Tensor> result;
|
||||
result.push_back(std::move(tensor));
|
||||
cc->Outputs().Tag(kOutput).AddPacket(
|
||||
MakePacket<std::vector<Tensor>>(std::move(result))
|
||||
.At(cc->InputTimestamp()));
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
bool DoesInputStartAtBottom() {
|
||||
return options_.gpu_origin() != mediapipe::GpuOrigin_Mode_TOP_LEFT;
|
||||
}
|
||||
|
||||
std::unique_ptr<ImageToTensorConverter> converter_;
|
||||
mediapipe::ImageToTensorCalculatorOptions options_;
|
||||
int output_width_ = 0;
|
||||
int output_height_ = 0;
|
||||
float range_min_ = 0.0f;
|
||||
float range_max_ = 1.0f;
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(ImageToTensorCalculator);
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message GpuOrigin {
|
||||
enum Mode {
|
||||
DEFAULT = 0;
|
||||
|
||||
// OpenGL: bottom-left origin
|
||||
// Metal : top-left origin
|
||||
CONVENTIONAL = 1;
|
||||
|
||||
// OpenGL: top-left origin
|
||||
// Metal : top-left origin
|
||||
TOP_LEFT = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message ImageToTensorCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional ImageToTensorCalculatorOptions ext = 334361939;
|
||||
}
|
||||
|
||||
// Range of float values [min, max].
|
||||
// min, must be strictly less than max.
|
||||
message FloatRange {
|
||||
optional float min = 1;
|
||||
optional float max = 2;
|
||||
}
|
||||
|
||||
optional int32 output_tensor_width = 1;
|
||||
optional int32 output_tensor_height = 2;
|
||||
|
||||
// If true, image region will be extracted and copied into tensor keeping
|
||||
// region aspect ratio, which usually results in letterbox padding. Otherwise,
|
||||
// if false, image region is stretched to fill output tensor fully.
|
||||
optional bool keep_aspect_ratio = 3;
|
||||
|
||||
// Output tensor element range/type image pixels are converted to.
|
||||
oneof range {
|
||||
FloatRange output_tensor_float_range = 4;
|
||||
}
|
||||
|
||||
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||
// to be flipped vertically as tensors are expected to start at top.
|
||||
// (DEFAULT or unset interpreted as CONVENTIONAL.)
|
||||
optional GpuOrigin.Mode gpu_origin = 5;
|
||||
}
|
262
mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc
Normal file
|
@ -0,0 +1,262 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
cv::Mat GetRgb(absl::string_view path) {
|
||||
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||
cv::Mat rgb;
|
||||
cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);
|
||||
return rgb;
|
||||
}
|
||||
|
||||
cv::Mat GetRgba(absl::string_view path) {
|
||||
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
|
||||
cv::Mat rgb;
|
||||
cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGBA);
|
||||
return rgb;
|
||||
}
|
||||
|
||||
// Image to tensor test template.
|
||||
// No processing/assertions should be done after the function is invoked.
|
||||
void RunTest(cv::Mat input, cv::Mat expected_result, float range_min,
|
||||
float range_max, int tensor_width, int tensor_height,
|
||||
bool keep_aspect, const mediapipe::NormalizedRect& roi) {
|
||||
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
absl::Substitute(R"(
|
||||
input_stream: "input_image"
|
||||
input_stream: "roi"
|
||||
node {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: $0
|
||||
output_tensor_height: $1
|
||||
keep_aspect_ratio: $4
|
||||
output_tensor_float_range {
|
||||
min: $2
|
||||
max: $3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)",
|
||||
/*$0=*/tensor_width,
|
||||
/*$1=*/tensor_height,
|
||||
/*$2=*/range_min,
|
||||
/*$3=*/range_max,
|
||||
/*$4=*/keep_aspect ? "true" : "false"));
|
||||
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
CalculatorGraph graph;
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
|
||||
ImageFrame input_image(
|
||||
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
|
||||
input.cols, input.rows, input.step, input.data, [](uint8*) {});
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"input_image",
|
||||
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"roi",
|
||||
MakePacket<mediapipe::NormalizedRect>(std::move(roi)).At(Timestamp(0))));
|
||||
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
ASSERT_THAT(output_packets, testing::SizeIs(1));
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& tensor_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
ASSERT_THAT(tensor_vec, testing::SizeIs(1));
|
||||
|
||||
const Tensor& tensor = tensor_vec[0];
|
||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
|
||||
|
||||
auto view = tensor.GetCpuReadView();
|
||||
cv::Mat tensor_mat(tensor_height, tensor_width, CV_32FC3,
|
||||
const_cast<float*>(view.buffer<float>()));
|
||||
cv::Mat result_rgb;
|
||||
auto transformation =
|
||||
GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f)
|
||||
.ValueOrDie();
|
||||
tensor_mat.convertTo(result_rgb, CV_8UC3, transformation.scale,
|
||||
transformation.offset);
|
||||
|
||||
cv::Mat diff;
|
||||
cv::absdiff(result_rgb, expected_result, diff);
|
||||
double max_val;
|
||||
cv::minMaxLoc(diff, nullptr, &max_val);
|
||||
// Expects the maximum absolute pixel-by-pixel difference is less than 5.
|
||||
EXPECT_LE(max_val, 5);
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||
MP_ASSERT_OK(graph.CloseInputStream("roi"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(0);
|
||||
RunTest(
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * 90.0f / 180.0f);
|
||||
RunTest(GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_with_rotation.png"),
|
||||
/*range_min=*/0.0f, /*range_max=*/1.0f,
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
|
||||
roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.65f);
|
||||
roi.set_y_center(0.4f);
|
||||
roi.set_width(0.5f);
|
||||
roi.set_height(0.5f);
|
||||
roi.set_rotation(M_PI * -45.0f / 180.0f);
|
||||
RunTest(
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"),
|
||||
/*range_min=*/-1.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false, roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, LargeSubRect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
RunTest(GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
|
||||
roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspect) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(0);
|
||||
RunTest(
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectWithRotation) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.5f);
|
||||
roi.set_height(1.1f);
|
||||
roi.set_rotation(M_PI * -15.0f / 180.0f);
|
||||
RunTest(GetRgba("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_with_rotation.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
roi);
|
||||
}
|
||||
|
||||
TEST(ImageToTensorCalculatorTest, NoOpExceptRange) {
|
||||
mediapipe::NormalizedRect roi;
|
||||
roi.set_x_center(0.5f);
|
||||
roi.set_y_center(0.5f);
|
||||
roi.set_width(1.0f);
|
||||
roi.set_height(1.0f);
|
||||
roi.set_rotation(0);
|
||||
RunTest(GetRgba("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/noop_except_range.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
roi);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
53
mediapipe/calculators/tensor/image_to_tensor_converter.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
struct Size {
|
||||
int width;
|
||||
int height;
|
||||
};
|
||||
|
||||
// Converts image to tensor.
|
||||
class ImageToTensorConverter {
|
||||
public:
|
||||
virtual ~ImageToTensorConverter() = default;
|
||||
|
||||
virtual Size GetImageSize(const Packet& image_packet) = 0;
|
||||
|
||||
// Converts image to tensor.
|
||||
// @image_packet contains image to extract from.
|
||||
// @roi describes region of interest within the image to extract (absolute
|
||||
// values).
|
||||
// @output_dims dimensions of output tensor.
|
||||
// @range_min/max describes output tensor range image pixels should converted
|
||||
// to.
|
||||
virtual ::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
|
||||
const RotatedRect& roi,
|
||||
const Size& output_dims,
|
||||
float range_min,
|
||||
float range_max) = 0;
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_
|
|
@ -0,0 +1,340 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/command_queue.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/variable.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
// Implements a common pattern of extracting a subrect from RGBA input texture
|
||||
// and resizing it into a buffer.
|
||||
class SubRectExtractorGl {
|
||||
public:
|
||||
// Extracts a region defined by @sub_rect, removes A channel, transforms input
|
||||
// pixels as alpha * x + beta and resizes result into destination.
|
||||
::mediapipe::Status ExtractSubRectToBuffer(
|
||||
const tflite::gpu::gl::GlTexture& texture,
|
||||
const tflite::gpu::HW& texture_size, const RotatedRect& sub_rect,
|
||||
bool flip_horizontaly, float alpha, float beta,
|
||||
const tflite::gpu::HW& destination_size,
|
||||
tflite::gpu::gl::CommandQueue* command_queue,
|
||||
tflite::gpu::gl::GlBuffer* destination);
|
||||
|
||||
static ::mediapipe::StatusOr<SubRectExtractorGl> Create(
|
||||
bool input_starts_at_bottom);
|
||||
|
||||
private:
|
||||
explicit SubRectExtractorGl(tflite::gpu::gl::GlProgram program,
|
||||
tflite::gpu::uint3 workgroup_size)
|
||||
: program_(std::move(program)), workgroup_size_(workgroup_size) {}
|
||||
|
||||
tflite::gpu::gl::GlProgram program_;
|
||||
tflite::gpu::uint3 workgroup_size_;
|
||||
};
|
||||
|
||||
::mediapipe::Status SetMat4x4(const tflite::gpu::gl::GlProgram& program,
|
||||
const std::string& name, float* data) {
|
||||
GLint uniform_id;
|
||||
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glGetUniformLocation, &uniform_id,
|
||||
program.id(), name.c_str()));
|
||||
return TFLITE_GPU_CALL_GL(glProgramUniformMatrix4fv, program.id(), uniform_id,
|
||||
1, GL_TRUE, data);
|
||||
}
|
||||
|
||||
class GlParametersOverride {
|
||||
public:
|
||||
static ::mediapipe::StatusOr<GlParametersOverride> Create(
|
||||
const std::vector<std::pair<GLenum, GLint>>& overrides) {
|
||||
std::vector<GLint> old_values(overrides.size());
|
||||
for (int i = 0; i < overrides.size(); ++i) {
|
||||
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glGetTexParameteriv, GL_TEXTURE_2D,
|
||||
overrides[i].first,
|
||||
&old_values[i]));
|
||||
if (overrides[i].second != old_values[i]) {
|
||||
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glTexParameteri, GL_TEXTURE_2D,
|
||||
overrides[i].first,
|
||||
overrides[i].second));
|
||||
}
|
||||
}
|
||||
return GlParametersOverride(overrides, std::move(old_values));
|
||||
}
|
||||
|
||||
::mediapipe::Status Revert() {
|
||||
for (int i = 0; i < overrides_.size(); ++i) {
|
||||
if (overrides_[i].second != old_values_[i]) {
|
||||
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glTexParameteri, GL_TEXTURE_2D,
|
||||
overrides_[i].first,
|
||||
old_values_[i]));
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
GlParametersOverride(const std::vector<std::pair<GLenum, GLint>>& overrides,
|
||||
std::vector<GLint> old_values)
|
||||
: overrides_(overrides), old_values_(std::move(old_values)) {}
|
||||
|
||||
std::vector<std::pair<GLenum, GLint>> overrides_;
|
||||
std::vector<GLint> old_values_;
|
||||
};
|
||||
|
||||
constexpr char kShaderCode[] = R"(
|
||||
layout(std430) buffer;
|
||||
|
||||
precision highp float;
|
||||
|
||||
// It is possible to use "vec3 elements[];" here, however due to alignment
|
||||
// requirements it works only when "packed" layout is used. "packed" layout is
|
||||
// determined by implementation and it's expected that OpenGL API is used to
|
||||
// query the layout. Favoring float array over vec3, considering performance is
|
||||
// comparable, layout is the same and no need for layout querying (even though
|
||||
// it's not quite needed here as there's only one member).
|
||||
layout(binding = 0) writeonly buffer B0 {
|
||||
float elements[];
|
||||
} output_data;
|
||||
|
||||
uniform ivec2 out_size;
|
||||
uniform float alpha;
|
||||
uniform float beta;
|
||||
uniform mat4 transform_matrix;
|
||||
uniform mediump sampler2D input_data;
|
||||
|
||||
void main() {
|
||||
int out_width = out_size.x;
|
||||
int out_height = out_size.y;
|
||||
|
||||
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (gid.x >= out_width || gid.y >= out_height) {
|
||||
return;
|
||||
}
|
||||
|
||||
// transform from image.width, image.height range to [0, 1]
|
||||
float normal_x = (float(gid.x) + 0.5f) / float(out_width);
|
||||
float normal_y = (float(gid.y) + 0.5f) / float(out_height);
|
||||
vec4 tc = vec4(normal_x, normal_y, 0.0, 1.0);
|
||||
|
||||
// Apply transformation from roi coordinates to original image coordinates.
|
||||
tc = transform_matrix * tc;
|
||||
#ifdef INPUT_STARTS_AT_BOTTOM
|
||||
// Opengl texture sampler has origin in lower left corner,
|
||||
// so we invert y coordinate.
|
||||
tc.y = 1.0f - tc.y;
|
||||
#endif // INPUT_STARTS_AT_BOTTOM
|
||||
vec4 src_value = alpha * texture(input_data, tc.xy) + beta;
|
||||
|
||||
int linear_index = gid.y * out_width + gid.x;
|
||||
|
||||
// output_data.elements is populated as though it contains vec3 elements.
|
||||
int first_component_index = 3 * linear_index;
|
||||
output_data.elements[first_component_index] = src_value.r;
|
||||
output_data.elements[first_component_index + 1] = src_value.g;
|
||||
output_data.elements[first_component_index + 2] = src_value.b;
|
||||
}
|
||||
)";
|
||||
|
||||
::mediapipe::Status SubRectExtractorGl::ExtractSubRectToBuffer(
|
||||
const tflite::gpu::gl::GlTexture& texture,
|
||||
const tflite::gpu::HW& texture_size, const RotatedRect& texture_sub_rect,
|
||||
bool flip_horizontaly, float alpha, float beta,
|
||||
const tflite::gpu::HW& destination_size,
|
||||
tflite::gpu::gl::CommandQueue* command_queue,
|
||||
tflite::gpu::gl::GlBuffer* destination) {
|
||||
std::array<float, 16> transform_mat;
|
||||
GetRotatedSubRectToRectTransformMatrix(texture_sub_rect, texture_size.w,
|
||||
texture_size.h, flip_horizontaly,
|
||||
&transform_mat);
|
||||
MP_RETURN_IF_ERROR(texture.BindAsSampler2D(0));
|
||||
|
||||
ASSIGN_OR_RETURN(auto overrides, GlParametersOverride::Create(
|
||||
{{GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE},
|
||||
{GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE},
|
||||
{GL_TEXTURE_MIN_FILTER, GL_LINEAR},
|
||||
{GL_TEXTURE_MAG_FILTER, GL_LINEAR}}));
|
||||
|
||||
MP_RETURN_IF_ERROR(destination->BindToIndex(0));
|
||||
MP_RETURN_IF_ERROR(program_.SetParameter({"input_data", 0}));
|
||||
MP_RETURN_IF_ERROR(
|
||||
SetMat4x4(program_, "transform_matrix", transform_mat.data()));
|
||||
MP_RETURN_IF_ERROR(program_.SetParameter(
|
||||
{"out_size", tflite::gpu::int2(destination_size.w, destination_size.h)}));
|
||||
MP_RETURN_IF_ERROR(program_.SetParameter({"alpha", alpha}));
|
||||
MP_RETURN_IF_ERROR(program_.SetParameter({"beta", beta}));
|
||||
tflite::gpu::uint3 num_workgroups = tflite::gpu::DivideRoundUp(
|
||||
tflite::gpu::uint3{destination_size.w, destination_size.h, 1},
|
||||
workgroup_size_);
|
||||
MP_RETURN_IF_ERROR(command_queue->Dispatch(program_, num_workgroups));
|
||||
|
||||
return overrides.Revert();
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<SubRectExtractorGl> SubRectExtractorGl::Create(
|
||||
bool input_starts_at_bottom) {
|
||||
const tflite::gpu::uint3 workgroup_size = {8, 8, 1};
|
||||
std::string starts_at_bottom_def;
|
||||
if (input_starts_at_bottom) {
|
||||
starts_at_bottom_def = R"(
|
||||
#define INPUT_STARTS_AT_BOTTOM;
|
||||
)";
|
||||
}
|
||||
const std::string full_shader_source =
|
||||
absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size),
|
||||
starts_at_bottom_def, kShaderCode);
|
||||
|
||||
tflite::gpu::gl::GlShader shader;
|
||||
MP_RETURN_IF_ERROR(tflite::gpu::gl::GlShader::CompileShader(
|
||||
GL_COMPUTE_SHADER, full_shader_source, &shader));
|
||||
tflite::gpu::gl::GlProgram program;
|
||||
MP_RETURN_IF_ERROR(
|
||||
tflite::gpu::gl::GlProgram::CreateWithShader(shader, &program));
|
||||
|
||||
return SubRectExtractorGl(std::move(program), workgroup_size);
|
||||
}
|
||||
|
||||
class GlProcessor : public ImageToTensorConverter {
|
||||
public:
|
||||
::mediapipe::Status Init(CalculatorContext* cc, bool input_starts_at_bottom) {
|
||||
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
|
||||
return gl_helper_.RunInGlContext(
|
||||
[this, input_starts_at_bottom]() -> ::mediapipe::Status {
|
||||
tflite::gpu::GpuInfo gpu_info;
|
||||
MP_RETURN_IF_ERROR(tflite::gpu::gl::RequestGpuInfo(&gpu_info));
|
||||
RET_CHECK(tflite::gpu::IsOpenGl31OrAbove(gpu_info))
|
||||
<< "OpenGL ES 3.1 is required.";
|
||||
command_queue_ = tflite::gpu::gl::NewCommandQueue(gpu_info);
|
||||
|
||||
ASSIGN_OR_RETURN(auto extractor,
|
||||
SubRectExtractorGl::Create(input_starts_at_bottom));
|
||||
extractor_ =
|
||||
absl::make_unique<SubRectExtractorGl>(std::move(extractor));
|
||||
return ::mediapipe::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
Size GetImageSize(const Packet& image_packet) override {
|
||||
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
return {image.width(), image.height()};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
|
||||
const RotatedRect& roi,
|
||||
const Size& output_dims,
|
||||
float range_min,
|
||||
float range_max) override {
|
||||
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
|
||||
return InvalidArgumentError(
|
||||
absl::StrCat("Only BGRA/RGBA textures are supported, passed format: ",
|
||||
static_cast<uint32_t>(input.format())));
|
||||
}
|
||||
|
||||
constexpr int kNumChannels = 3;
|
||||
Tensor tensor(Tensor::ElementType::kFloat32,
|
||||
{1, output_dims.height, output_dims.width, kNumChannels});
|
||||
|
||||
MP_RETURN_IF_ERROR(gl_helper_.RunInGlContext(
|
||||
[this, &tensor, &input, &roi, &output_dims, range_min,
|
||||
range_max]() -> ::mediapipe::Status {
|
||||
constexpr int kRgbaNumChannels = 4;
|
||||
auto source_texture = gl_helper_.CreateSourceTexture(input);
|
||||
tflite::gpu::gl::GlTexture input_texture(
|
||||
GL_TEXTURE_2D, source_texture.name(), GL_RGBA,
|
||||
source_texture.width() * source_texture.height() *
|
||||
kRgbaNumChannels * sizeof(uint8_t),
|
||||
/*layer=*/0,
|
||||
/*owned=*/false);
|
||||
|
||||
constexpr float kInputImageRangeMin = 0.0f;
|
||||
constexpr float kInputImageRangeMax = 1.0f;
|
||||
ASSIGN_OR_RETURN(auto transform,
|
||||
GetValueRangeTransformation(kInputImageRangeMin,
|
||||
kInputImageRangeMax,
|
||||
range_min, range_max));
|
||||
|
||||
auto buffer_view = tensor.GetOpenGlBufferWriteView();
|
||||
tflite::gpu::gl::GlBuffer output(GL_SHADER_STORAGE_BUFFER,
|
||||
buffer_view.name(), tensor.bytes(),
|
||||
/*offset=*/0,
|
||||
/*has_ownership=*/false);
|
||||
MP_RETURN_IF_ERROR(extractor_->ExtractSubRectToBuffer(
|
||||
input_texture,
|
||||
tflite::gpu::HW(source_texture.height(), source_texture.width()),
|
||||
roi,
|
||||
/*flip_horizontaly=*/false, transform.scale, transform.offset,
|
||||
tflite::gpu::HW(output_dims.height, output_dims.width),
|
||||
command_queue_.get(), &output));
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
~GlProcessor() override {
|
||||
gl_helper_.RunInGlContext([this]() {
|
||||
// Release OpenGL resources.
|
||||
extractor_ = nullptr;
|
||||
command_queue_ = nullptr;
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<tflite::gpu::gl::CommandQueue> command_queue_;
|
||||
std::unique_ptr<SubRectExtractorGl> extractor_;
|
||||
mediapipe::GlCalculatorHelper gl_helper_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
|
||||
bool input_starts_at_bottom) {
|
||||
auto result = absl::make_unique<GlProcessor>();
|
||||
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom));
|
||||
|
||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_
|
||||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Creates image to tensor (represented as OpenGL buffer) converter.
|
||||
// NOTE: mediapipe::GlCalculatorHelper::UpdateContract invocation must precede
|
||||
// converter creation.
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
|
||||
bool input_starts_at_bottom);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_
|
|
@ -0,0 +1,323 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
|
||||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "mediapipe/gpu/shader_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
class GlParametersOverride {
|
||||
public:
|
||||
static ::mediapipe::StatusOr<GlParametersOverride> Create(
|
||||
const std::vector<std::pair<GLenum, GLint>>& overrides) {
|
||||
std::vector<GLint> old_values(overrides.size());
|
||||
for (int i = 0; i < overrides.size(); ++i) {
|
||||
glGetTexParameteriv(GL_TEXTURE_2D, overrides[i].first, &old_values[i]);
|
||||
if (overrides[i].second != old_values[i]) {
|
||||
glTexParameteri(GL_TEXTURE_2D, overrides[i].first, overrides[i].second);
|
||||
}
|
||||
}
|
||||
return GlParametersOverride(overrides, std::move(old_values));
|
||||
}
|
||||
|
||||
::mediapipe::Status Revert() {
|
||||
for (int i = 0; i < overrides_.size(); ++i) {
|
||||
if (overrides_[i].second != old_values_[i]) {
|
||||
glTexParameteri(GL_TEXTURE_2D, overrides_[i].first, old_values_[i]);
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
GlParametersOverride(const std::vector<std::pair<GLenum, GLint>>& overrides,
|
||||
std::vector<GLint> old_values)
|
||||
: overrides_(overrides), old_values_(std::move(old_values)) {}
|
||||
|
||||
std::vector<std::pair<GLenum, GLint>> overrides_;
|
||||
std::vector<GLint> old_values_;
|
||||
};
|
||||
|
||||
constexpr int kAttribVertex = 0;
|
||||
constexpr int kAttribTexturePosition = 1;
|
||||
constexpr int kNumAttributes = 2;
|
||||
|
||||
class GlProcessor : public ImageToTensorConverter {
|
||||
public:
|
||||
::mediapipe::Status Init(CalculatorContext* cc, bool input_starts_at_bottom) {
|
||||
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
|
||||
return gl_helper_.RunInGlContext([this, input_starts_at_bottom]()
|
||||
-> ::mediapipe::Status {
|
||||
const GLint attr_location[kNumAttributes] = {
|
||||
kAttribVertex,
|
||||
kAttribTexturePosition,
|
||||
};
|
||||
const GLchar* attr_name[kNumAttributes] = {
|
||||
"position",
|
||||
"texture_coordinate",
|
||||
};
|
||||
|
||||
constexpr GLchar kExtractSubRectVertexShader[] = R"(
|
||||
in vec4 position;
|
||||
in mediump vec4 texture_coordinate;
|
||||
out mediump vec2 sample_coordinate;
|
||||
uniform mat4 transform_matrix;
|
||||
|
||||
void main() {
|
||||
gl_Position = position;
|
||||
// Apply transformation from roi coordinates to original image coordinates.
|
||||
vec4 tc = transform_matrix * texture_coordinate;
|
||||
#ifdef INPUT_STARTS_AT_BOTTOM
|
||||
// Opengl texture sampler has origin in lower left corner,
|
||||
// so we invert y coordinate.
|
||||
tc.y = 1.0 - tc.y;
|
||||
#endif // defined(INPUT_STARTS_AT_BOTTOM)
|
||||
sample_coordinate = tc.xy;
|
||||
}
|
||||
)";
|
||||
|
||||
constexpr GLchar kExtractSubRectFragBody[] = R"(
|
||||
DEFAULT_PRECISION(mediump, float)
|
||||
|
||||
// Provided by kExtractSubRectVertexShader.
|
||||
in vec2 sample_coordinate;
|
||||
|
||||
uniform sampler2D input_texture;
|
||||
uniform float alpha;
|
||||
uniform float beta;
|
||||
|
||||
#ifdef GL_ES
|
||||
#define fragColor gl_FragColor
|
||||
#else
|
||||
out vec4 fragColor;
|
||||
#endif // defined(GL_ES);
|
||||
|
||||
void main() {
|
||||
fragColor = alpha * texture2D(input_texture, sample_coordinate) + beta;
|
||||
}
|
||||
)";
|
||||
|
||||
std::string starts_at_bottom_def;
|
||||
if (input_starts_at_bottom) {
|
||||
starts_at_bottom_def = R"(
|
||||
#define INPUT_STARTS_AT_BOTTOM
|
||||
)";
|
||||
}
|
||||
|
||||
// Create program and set parameters.
|
||||
const std::string extract_sub_rect_vertex_src =
|
||||
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble,
|
||||
starts_at_bottom_def, kExtractSubRectVertexShader);
|
||||
const std::string extract_sub_rect_frag_src = absl::StrCat(
|
||||
mediapipe::kMediaPipeFragmentShaderPreamble, kExtractSubRectFragBody);
|
||||
mediapipe::GlhCreateProgram(extract_sub_rect_vertex_src.c_str(),
|
||||
extract_sub_rect_frag_src.c_str(),
|
||||
kNumAttributes, &attr_name[0], attr_location,
|
||||
&program_);
|
||||
|
||||
RET_CHECK(program_) << "Problem initializing image to tensor program.";
|
||||
glUseProgram(program_);
|
||||
glUniform1i(glGetUniformLocation(program_, "input_texture"), 1);
|
||||
alpha_id_ = glGetUniformLocation(program_, "alpha");
|
||||
beta_id_ = glGetUniformLocation(program_, "beta");
|
||||
matrix_id_ = glGetUniformLocation(program_, "transform_matrix");
|
||||
|
||||
glGenFramebuffers(1, &framebuffer_);
|
||||
|
||||
// vertex storage
|
||||
glGenBuffers(2, vbo_);
|
||||
glGenVertexArrays(1, &vao_);
|
||||
|
||||
// vbo 0
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
|
||||
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
|
||||
|
||||
// vbo 1
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
|
||||
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
Size GetImageSize(const Packet& image_packet) override {
|
||||
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
return {image.width(), image.height()};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
|
||||
const RotatedRect& roi,
|
||||
const Size& output_dims,
|
||||
float range_min,
|
||||
float range_max) override {
|
||||
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
|
||||
return InvalidArgumentError(
|
||||
absl::StrCat("Only BGRA/RGBA textures are supported, passed format: ",
|
||||
static_cast<uint32_t>(input.format())));
|
||||
}
|
||||
|
||||
constexpr int kNumChannels = 3;
|
||||
Tensor tensor(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
|
||||
|
||||
MP_RETURN_IF_ERROR(gl_helper_.RunInGlContext(
|
||||
[this, &tensor, &input, &roi, &output_dims, range_min,
|
||||
range_max]() -> ::mediapipe::Status {
|
||||
auto input_texture = gl_helper_.CreateSourceTexture(input);
|
||||
|
||||
constexpr float kInputImageRangeMin = 0.0f;
|
||||
constexpr float kInputImageRangeMax = 1.0f;
|
||||
ASSIGN_OR_RETURN(auto transform,
|
||||
GetValueRangeTransformation(kInputImageRangeMin,
|
||||
kInputImageRangeMax,
|
||||
range_min, range_max));
|
||||
auto tensor_view = tensor.GetOpenGlTexture2dWriteView();
|
||||
MP_RETURN_IF_ERROR(ExtractSubRect(input_texture, roi,
|
||||
/*flip_horizontaly=*/false,
|
||||
transform.scale, transform.offset,
|
||||
output_dims, &tensor_view));
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
|
||||
return tensor;
|
||||
}
|
||||
|
||||
::mediapipe::Status ExtractSubRect(const mediapipe::GlTexture& texture,
|
||||
const RotatedRect& sub_rect,
|
||||
bool flip_horizontaly, float alpha,
|
||||
float beta, const Size& output_dims,
|
||||
Tensor::OpenGlTexture2dView* output) {
|
||||
std::array<float, 16> transform_mat;
|
||||
GetRotatedSubRectToRectTransformMatrix(sub_rect, texture.width(),
|
||||
texture.height(), flip_horizontaly,
|
||||
&transform_mat);
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
|
||||
glViewport(0, 0, output_dims.width, output_dims.height);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, output->name());
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
output->name(), 0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(texture.target(), texture.name());
|
||||
|
||||
ASSIGN_OR_RETURN(auto overrides, GlParametersOverride::Create(
|
||||
{{GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE},
|
||||
{GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE},
|
||||
{GL_TEXTURE_MIN_FILTER, GL_LINEAR},
|
||||
{GL_TEXTURE_MAG_FILTER, GL_LINEAR}}));
|
||||
|
||||
glUseProgram(program_);
|
||||
glUniform1f(alpha_id_, alpha);
|
||||
glUniform1f(beta_id_, beta);
|
||||
glUniformMatrix4fv(matrix_id_, 1, GL_TRUE, transform_mat.data());
|
||||
|
||||
// vao
|
||||
glBindVertexArray(vao_);
|
||||
|
||||
// vbo 0
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
|
||||
glEnableVertexAttribArray(kAttribVertex);
|
||||
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
|
||||
|
||||
// vbo 1
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
|
||||
glEnableVertexAttribArray(kAttribTexturePosition);
|
||||
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
|
||||
|
||||
// draw
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
// cleanup
|
||||
glDisableVertexAttribArray(kAttribVertex);
|
||||
glDisableVertexAttribArray(kAttribTexturePosition);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glBindVertexArray(0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
return overrides.Revert();
|
||||
}
|
||||
|
||||
~GlProcessor() override {
|
||||
gl_helper_.RunInGlContext([this]() {
|
||||
// Release OpenGL resources.
|
||||
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
|
||||
if (program_ != 0) glDeleteProgram(program_);
|
||||
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
|
||||
glDeleteBuffers(2, vbo_);
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
mediapipe::GlCalculatorHelper gl_helper_;
|
||||
GLuint vao_ = 0;
|
||||
GLuint vbo_[2] = {0, 0};
|
||||
GLuint program_ = 0;
|
||||
GLuint framebuffer_ = 0;
|
||||
GLint alpha_id_ = 0;
|
||||
GLint beta_id_ = 0;
|
||||
GLint matrix_id_ = 0;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
|
||||
bool input_starts_at_bottom) {
|
||||
auto result = absl::make_unique<GlProcessor>();
|
||||
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom));
|
||||
|
||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
|
||||
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
|
||||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Creates image to tensor (represented as OpenGL texture) converter.
|
||||
// NOTE: mediapipe::GlCalculatorHelper::UpdateContract invocation must precede
|
||||
// converter creation.
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
|
||||
bool input_starts_at_bottom);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
|
397
mediapipe/calculators/tensor/image_to_tensor_converter_metal.cc
Normal file
|
@ -0,0 +1,397 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_metal.h"
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#import <Metal/Metal.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/MPPMetalHelper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
// clang-format off
|
||||
// a square formed by 2 triangles
|
||||
const float kBasicSquareVertices[] = {
|
||||
-1, 1, 0, 1,
|
||||
1, 1, 0, 1,
|
||||
1, -1, 0, 1,
|
||||
-1, 1, 0, 1,
|
||||
1, -1, 0, 1,
|
||||
-1, -1, 0, 1,
|
||||
};
|
||||
|
||||
// maps a texture to kBasicSquareVertices via aspect fill
|
||||
const float kBasicTextureVertices[] = {
|
||||
0, 0, 0, 1,
|
||||
1, 0, 0, 1,
|
||||
1, 1, 0, 1,
|
||||
0, 0, 0, 1,
|
||||
1, 1, 0, 1,
|
||||
0, 1, 0, 1,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
constexpr char kShaderLibHeader[] = R"(
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct TextureVertex
|
||||
{
|
||||
float4 position [[position]];
|
||||
float2 uv;
|
||||
};
|
||||
)";
|
||||
|
||||
constexpr char kVertexShader[] = R"(
|
||||
vertex TextureVertex vertexShader(
|
||||
constant float4 *position [[buffer(0)]],
|
||||
device float4* tex_coords [[buffer(1)]],
|
||||
constant float4x4& transform_matrix [[buffer(2)]],
|
||||
uint vid [[vertex_id]]) {
|
||||
TextureVertex vert;
|
||||
vert.position = position[vid];
|
||||
vert.uv = (tex_coords[vid] * transform_matrix).xy;
|
||||
return vert;
|
||||
}
|
||||
)";
|
||||
|
||||
constexpr char kFragmentShader[] = R"(
|
||||
#ifdef OUTPUT_F16C4
|
||||
#define Type4 half4
|
||||
#define Type half
|
||||
#endif // OUTPUT_F16C4
|
||||
|
||||
#ifdef OUTPUT_F32C4
|
||||
#define Type4 float4
|
||||
#define Type float
|
||||
#endif // OUTPUT_F32C4
|
||||
|
||||
fragment Type4 fragmentShader(TextureVertex vertex_output [[stage_in]],
|
||||
texture2d<Type> texture [[texture(0)]],
|
||||
constant float* parameters [[buffer(1)]])
|
||||
{
|
||||
const float alpha = parameters[0];
|
||||
const float beta = parameters[1];
|
||||
|
||||
constexpr sampler linear_sampler(address::clamp_to_edge, min_filter::linear,
|
||||
mag_filter::linear);
|
||||
|
||||
Type4 texture_pixel = texture.sample(linear_sampler, vertex_output.uv);
|
||||
return Type4(alpha * texture_pixel.rgb + beta, 0);
|
||||
}
|
||||
)";
|
||||
|
||||
enum class OutputFormat { kF16C4, kF32C4 };
|
||||
|
||||
MTLPixelFormat GetPixelFormat(OutputFormat output_format) {
|
||||
switch (output_format) {
|
||||
case OutputFormat::kF16C4:
|
||||
return MTLPixelFormatRGBA16Float;
|
||||
case OutputFormat::kF32C4:
|
||||
return MTLPixelFormatRGBA32Float;
|
||||
}
|
||||
}
|
||||
int GetBytesPerRaw(OutputFormat output_format, const tflite::gpu::HW& size) {
|
||||
std::size_t type_size;
|
||||
switch (output_format) {
|
||||
case OutputFormat::kF16C4:
|
||||
type_size = sizeof(tflite::gpu::HalfBits);
|
||||
break;
|
||||
case OutputFormat::kF32C4:
|
||||
type_size = sizeof(float);
|
||||
break;
|
||||
}
|
||||
constexpr int kNumChannels = 4;
|
||||
return size.w * kNumChannels * type_size;
|
||||
}
|
||||
|
||||
class SubRectExtractorMetal {
|
||||
public:
|
||||
static ::mediapipe::StatusOr<std::unique_ptr<SubRectExtractorMetal>> Make(
|
||||
id<MTLDevice> device, OutputFormat output_format) {
|
||||
id<MTLRenderPipelineState> pipeline_state;
|
||||
MP_RETURN_IF_ERROR(SubRectExtractorMetal::MakePipelineState(
|
||||
device, output_format, &pipeline_state));
|
||||
|
||||
return absl::make_unique<SubRectExtractorMetal>(device, pipeline_state,
|
||||
output_format);
|
||||
}
|
||||
|
||||
SubRectExtractorMetal(id<MTLDevice> device,
|
||||
id<MTLRenderPipelineState> pipeline_state,
|
||||
OutputFormat output_format)
|
||||
: device_(device),
|
||||
pipeline_state_(pipeline_state),
|
||||
output_format_(output_format) {
|
||||
positions_buffer_ =
|
||||
[device_ newBufferWithBytes:kBasicSquareVertices
|
||||
length:sizeof(kBasicSquareVertices)
|
||||
options:MTLResourceOptionCPUCacheModeDefault];
|
||||
|
||||
tex_coords_buffer_ =
|
||||
[device_ newBufferWithBytes:kBasicTextureVertices
|
||||
length:sizeof(kBasicTextureVertices)
|
||||
options:MTLResourceOptionCPUCacheModeDefault];
|
||||
|
||||
transform_mat_buffer_ =
|
||||
[device_ newBufferWithBytes:&transform_mat_
|
||||
length:sizeof(transform_mat_)
|
||||
options:MTLResourceOptionCPUCacheModeDefault];
|
||||
}
|
||||
|
||||
::mediapipe::Status Execute(id<MTLTexture> input_texture,
|
||||
const RotatedRect& sub_rect,
|
||||
bool flip_horizontaly, float alpha, float beta,
|
||||
const tflite::gpu::HW& destination_size,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
id<MTLBuffer> destination) {
|
||||
auto output_texture = MTLTextureWithBuffer(destination_size, destination);
|
||||
return InternalExecute(input_texture, sub_rect, flip_horizontaly, alpha,
|
||||
beta, destination_size, command_buffer,
|
||||
output_texture);
|
||||
}
|
||||
|
||||
private:
|
||||
id<MTLTexture> MTLTextureWithBuffer(const tflite::gpu::HW& size,
|
||||
id<MTLBuffer> buffer) {
|
||||
MTLTextureDescriptor* texture_desc = [MTLTextureDescriptor
|
||||
texture2DDescriptorWithPixelFormat:GetPixelFormat(output_format_)
|
||||
width:size.w
|
||||
height:size.h
|
||||
mipmapped:NO];
|
||||
texture_desc.usage = MTLTextureUsageRenderTarget;
|
||||
|
||||
NSUInteger output_bytes_per_row = GetBytesPerRaw(output_format_, size);
|
||||
|
||||
id<MTLTexture> texture =
|
||||
[buffer newTextureWithDescriptor:texture_desc
|
||||
offset:0
|
||||
bytesPerRow:output_bytes_per_row];
|
||||
return texture;
|
||||
}
|
||||
|
||||
::mediapipe::Status InternalExecute(id<MTLTexture> input_texture,
|
||||
const RotatedRect& sub_rect,
|
||||
bool flip_horizontaly, float alpha,
|
||||
float beta,
|
||||
const tflite::gpu::HW& destination_size,
|
||||
id<MTLCommandBuffer> command_buffer,
|
||||
id<MTLTexture> output_texture) {
|
||||
RET_CHECK(command_buffer != nil);
|
||||
RET_CHECK(output_texture != nil);
|
||||
|
||||
// Obtain texture mapping coordinates transformation matrix and copy its
|
||||
// data to the buffer.
|
||||
GetRotatedSubRectToRectTransformMatrix(sub_rect, input_texture.width,
|
||||
input_texture.height,
|
||||
flip_horizontaly, &transform_mat_);
|
||||
std::memcpy(reinterpret_cast<float*>(transform_mat_buffer_.contents),
|
||||
transform_mat_.data(), sizeof(transform_mat_));
|
||||
|
||||
// Create parameters wrapper.
|
||||
float parameters[] = {alpha, beta};
|
||||
|
||||
// Now everything is ready to go!
|
||||
// Setup render pass.
|
||||
MTLRenderPassDescriptor* render_pass_desc =
|
||||
[MTLRenderPassDescriptor renderPassDescriptor];
|
||||
render_pass_desc.colorAttachments[0].texture = output_texture;
|
||||
render_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore;
|
||||
render_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear;
|
||||
|
||||
// Setup render command encoder.
|
||||
id<MTLRenderCommandEncoder> command_encoder =
|
||||
[command_buffer renderCommandEncoderWithDescriptor:render_pass_desc];
|
||||
[command_encoder setRenderPipelineState:pipeline_state_];
|
||||
[command_encoder setVertexBuffer:positions_buffer_ offset:0 atIndex:0];
|
||||
[command_encoder setVertexBuffer:tex_coords_buffer_ offset:0 atIndex:1];
|
||||
[command_encoder setVertexBuffer:transform_mat_buffer_ offset:0 atIndex:2];
|
||||
[command_encoder setFragmentTexture:input_texture atIndex:0];
|
||||
[command_encoder setFragmentBytes:¶meters
|
||||
length:sizeof(parameters)
|
||||
atIndex:1];
|
||||
|
||||
[command_encoder drawPrimitives:MTLPrimitiveTypeTriangle
|
||||
vertexStart:0
|
||||
vertexCount:6];
|
||||
[command_encoder endEncoding];
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
static ::mediapipe::Status MakePipelineState(
|
||||
id<MTLDevice> device, OutputFormat output_format,
|
||||
id<MTLRenderPipelineState>* pipeline_state) {
|
||||
RET_CHECK(pipeline_state != nil);
|
||||
|
||||
std::string output_type_def;
|
||||
MTLPixelFormat pixel_format;
|
||||
switch (output_format) {
|
||||
case OutputFormat::kF16C4:
|
||||
output_type_def = R"(
|
||||
#define OUTPUT_F16C4
|
||||
)";
|
||||
break;
|
||||
case OutputFormat::kF32C4:
|
||||
output_type_def = R"(
|
||||
#define OUTPUT_F32C4
|
||||
)";
|
||||
break;
|
||||
}
|
||||
|
||||
std::string shader_lib = absl::StrCat(kShaderLibHeader, output_type_def,
|
||||
kVertexShader, kFragmentShader);
|
||||
NSError* error = nil;
|
||||
NSString* library_source =
|
||||
[NSString stringWithUTF8String:shader_lib.c_str()];
|
||||
|
||||
id<MTLLibrary> library =
|
||||
[device newLibraryWithSource:library_source options:nil error:&error];
|
||||
RET_CHECK(library != nil) << "Couldn't create a shader library"
|
||||
<< [[error localizedDescription] UTF8String];
|
||||
|
||||
id<MTLFunction> vertex_function =
|
||||
[library newFunctionWithName:@"vertexShader"];
|
||||
RET_CHECK(vertex_function != nil)
|
||||
<< "Failed creating a new vertex function!";
|
||||
|
||||
id<MTLFunction> fragment_function =
|
||||
[library newFunctionWithName:@"fragmentShader"];
|
||||
RET_CHECK(fragment_function != nil)
|
||||
<< "Failed creating a new fragment function!";
|
||||
|
||||
MTLRenderPipelineDescriptor* pipelineDescriptor =
|
||||
[MTLRenderPipelineDescriptor new];
|
||||
pipelineDescriptor.vertexFunction = vertex_function;
|
||||
pipelineDescriptor.fragmentFunction = fragment_function;
|
||||
pipelineDescriptor.colorAttachments[0].pixelFormat =
|
||||
GetPixelFormat(output_format);
|
||||
|
||||
*pipeline_state =
|
||||
[device newRenderPipelineStateWithDescriptor:pipelineDescriptor
|
||||
error:&error];
|
||||
RET_CHECK(error == nil) << "Couldn't create a pipeline state"
|
||||
<< [[error localizedDescription] UTF8String];
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
id<MTLBuffer> positions_buffer_;
|
||||
id<MTLBuffer> tex_coords_buffer_;
|
||||
id<MTLBuffer> transform_mat_buffer_;
|
||||
id<MTLDevice> device_;
|
||||
id<MTLRenderPipelineState> pipeline_state_;
|
||||
std::array<float, 16> transform_mat_;
|
||||
OutputFormat output_format_;
|
||||
};
|
||||
|
||||
class MetalProcessor : public ImageToTensorConverter {
|
||||
public:
|
||||
::mediapipe::Status Init(CalculatorContext* cc) {
|
||||
metal_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||
RET_CHECK(metal_helper_);
|
||||
ASSIGN_OR_RETURN(extractor_,
|
||||
SubRectExtractorMetal::Make(metal_helper_.mtlDevice,
|
||||
OutputFormat::kF32C4));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
Size GetImageSize(const Packet& image_packet) override {
|
||||
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
return {image.width(), image.height()};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
|
||||
const RotatedRect& roi,
|
||||
const Size& output_dims,
|
||||
float range_min,
|
||||
float range_max) override {
|
||||
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
|
||||
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
|
||||
return InvalidArgumentError(
|
||||
absl::StrCat("Only BGRA/RGBA textures are supported, passed "
|
||||
"format: ",
|
||||
static_cast<uint32_t>(input.format())));
|
||||
}
|
||||
|
||||
@autoreleasepool {
|
||||
id<MTLTexture> texture = [metal_helper_ metalTextureWithGpuBuffer:input];
|
||||
|
||||
constexpr int kNumChannels = 4;
|
||||
Tensor tensor(Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, output_dims.height, output_dims.width,
|
||||
kNumChannels});
|
||||
|
||||
constexpr float kInputImageRangeMin = 0.0f;
|
||||
constexpr float kInputImageRangeMax = 1.0f;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto transform,
|
||||
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
|
||||
range_min, range_max));
|
||||
|
||||
id<MTLCommandBuffer> command_buffer = [metal_helper_ commandBuffer];
|
||||
const auto& buffer_view = tensor.GetMtlBufferWriteView(command_buffer);
|
||||
MP_RETURN_IF_ERROR(extractor_->Execute(
|
||||
texture, roi,
|
||||
/*flip_horizontaly=*/false, transform.scale, transform.offset,
|
||||
tflite::gpu::HW(output_dims.height, output_dims.width),
|
||||
command_buffer, buffer_view.buffer()));
|
||||
[command_buffer commit];
|
||||
// TODO: consider removing waitUntilCompleted
|
||||
[command_buffer waitUntilCompleted];
|
||||
|
||||
return tensor;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MPPMetalHelper* metal_helper_ = nil;
|
||||
std::unique_ptr<SubRectExtractorMetal> extractor_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateMetalConverter(CalculatorContext* cc) {
|
||||
auto result = absl::make_unique<MetalProcessor>();
|
||||
MP_RETURN_IF_ERROR(result->Init(cc));
|
||||
|
||||
// Simply "return std::move(result)" failed to build on macOS with bazel.
|
||||
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_
|
||||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Creates Metal image-to-tensor converter.
|
||||
// NOTE: [MPPMetalHelper updateContract:...] invocation must precede
|
||||
// converter creation.
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateMetalConverter(CalculatorContext* cc);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_
|
116
mediapipe/calculators/tensor/image_to_tensor_converter_opencv.cc
Normal file
|
@ -0,0 +1,116 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
class OpenCvProcessor : public ImageToTensorConverter {
|
||||
public:
|
||||
Size GetImageSize(const Packet& image_packet) override {
|
||||
const auto& image = image_packet.Get<mediapipe::ImageFrame>();
|
||||
return {image.Width(), image.Height()};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
|
||||
const RotatedRect& roi,
|
||||
const Size& output_dims,
|
||||
float range_min,
|
||||
float range_max) override {
|
||||
const auto& input = image_packet.Get<mediapipe::ImageFrame>();
|
||||
if (input.Format() != mediapipe::ImageFormat::SRGB &&
|
||||
input.Format() != mediapipe::ImageFormat::SRGBA) {
|
||||
return InvalidArgumentError(
|
||||
absl::StrCat("Only RGBA/RGB formats are supported, passed format: ",
|
||||
static_cast<uint32_t>(input.Format())));
|
||||
}
|
||||
cv::Mat src = mediapipe::formats::MatView(&input);
|
||||
|
||||
constexpr int kNumChannels = 3;
|
||||
Tensor tensor(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
|
||||
auto buffer_view = tensor.GetCpuWriteView();
|
||||
cv::Mat dst(output_dims.height, output_dims.width, CV_32FC3,
|
||||
buffer_view.buffer<float>());
|
||||
|
||||
const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y),
|
||||
cv::Size2f(roi.width, roi.height),
|
||||
roi.rotation * 180.f / M_PI);
|
||||
cv::Mat src_points;
|
||||
cv::boxPoints(rotated_rect, src_points);
|
||||
|
||||
const float dst_width = output_dims.width;
|
||||
const float dst_height = output_dims.height;
|
||||
/* clang-format off */
|
||||
float dst_corners[8] = {0.0f, dst_height,
|
||||
0.0f, 0.0f,
|
||||
dst_width, 0.0f,
|
||||
dst_width, dst_height};
|
||||
/* clang-format on */
|
||||
|
||||
cv::Mat dst_points = cv::Mat(4, 2, CV_32F, dst_corners);
|
||||
cv::Mat projection_matrix =
|
||||
cv::getPerspectiveTransform(src_points, dst_points);
|
||||
cv::Mat transformed;
|
||||
cv::warpPerspective(src, transformed, projection_matrix,
|
||||
cv::Size(dst_width, dst_height),
|
||||
/*flags=*/cv::INTER_LINEAR,
|
||||
/*borderMode=*/cv::BORDER_REPLICATE);
|
||||
|
||||
if (transformed.channels() > kNumChannels) {
|
||||
cv::Mat proper_channels_mat;
|
||||
cv::cvtColor(transformed, proper_channels_mat, cv::COLOR_RGBA2RGB);
|
||||
transformed = proper_channels_mat;
|
||||
}
|
||||
|
||||
constexpr float kInputImageRangeMin = 0.0f;
|
||||
constexpr float kInputImageRangeMax = 255.0f;
|
||||
ASSIGN_OR_RETURN(
|
||||
auto transform,
|
||||
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
|
||||
range_min, range_max));
|
||||
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
|
||||
return tensor;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateOpenCvConverter(CalculatorContext* cc) {
|
||||
// Simply "return absl::make_unique<OpenCvProcessor>()" failed to build on
|
||||
// macOS with bazel.
|
||||
return std::unique_ptr<ImageToTensorConverter>(
|
||||
absl::make_unique<OpenCvProcessor>());
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Creates OpenCV image-to-tensor converter.
|
||||
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
|
||||
CreateOpenCvConverter(CalculatorContext* cc);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_
|
176
mediapipe/calculators/tensor/image_to_tensor_utils.cc
Normal file
|
@ -0,0 +1,176 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
RotatedRect GetRoi(int input_width, int input_height,
|
||||
absl::optional<mediapipe::NormalizedRect> norm_rect) {
|
||||
if (norm_rect) {
|
||||
return {.center_x = norm_rect->x_center() * input_width,
|
||||
.center_y = norm_rect->y_center() * input_height,
|
||||
.width = norm_rect->width() * input_width,
|
||||
.height = norm_rect->height() * input_height,
|
||||
.rotation = norm_rect->rotation()};
|
||||
}
|
||||
return {.center_x = 0.5f * input_width,
|
||||
.center_y = 0.5f * input_height,
|
||||
.width = static_cast<float>(input_width),
|
||||
.height = static_cast<float>(input_height),
|
||||
.rotation = 0};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
|
||||
int input_tensor_height,
|
||||
bool keep_aspect_ratio,
|
||||
RotatedRect* roi) {
|
||||
if (!keep_aspect_ratio) {
|
||||
return std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f};
|
||||
}
|
||||
|
||||
RET_CHECK(input_tensor_width > 0 && input_tensor_height > 0)
|
||||
<< "Input tensor width and height must be > 0.";
|
||||
const float tensor_aspect_ratio =
|
||||
static_cast<float>(input_tensor_height) / input_tensor_width;
|
||||
|
||||
RET_CHECK(roi->width > 0 && roi->height > 0)
|
||||
<< "ROI width and height must be > 0.";
|
||||
const float roi_aspect_ratio = roi->height / roi->width;
|
||||
|
||||
float vertical_padding = 0.0f;
|
||||
float horizontal_padding = 0.0f;
|
||||
float new_width;
|
||||
float new_height;
|
||||
if (tensor_aspect_ratio > roi_aspect_ratio) {
|
||||
new_width = roi->width;
|
||||
new_height = roi->width * tensor_aspect_ratio;
|
||||
vertical_padding = (1.0f - roi_aspect_ratio / tensor_aspect_ratio) / 2.0f;
|
||||
} else {
|
||||
new_width = roi->height / tensor_aspect_ratio;
|
||||
new_height = roi->height;
|
||||
horizontal_padding = (1.0f - tensor_aspect_ratio / roi_aspect_ratio) / 2.0f;
|
||||
}
|
||||
|
||||
roi->width = new_width;
|
||||
roi->height = new_height;
|
||||
|
||||
return std::array<float, 4>{horizontal_padding, vertical_padding,
|
||||
horizontal_padding, vertical_padding};
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<ValueTransformation> GetValueRangeTransformation(
|
||||
float from_range_min, float from_range_max, float to_range_min,
|
||||
float to_range_max) {
|
||||
RET_CHECK_LT(from_range_min, from_range_max)
|
||||
<< "Invalid FROM range: min >= max.";
|
||||
RET_CHECK_LT(to_range_min, to_range_max) << "Invalid TO range: min >= max.";
|
||||
const float scale =
|
||||
(to_range_max - to_range_min) / (from_range_max - from_range_min);
|
||||
const float offset = to_range_min - from_range_min * scale;
|
||||
return ValueTransformation{scale, offset};
|
||||
}
|
||||
|
||||
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
|
||||
int rect_width, int rect_height,
|
||||
bool flip_horizontaly,
|
||||
std::array<float, 16>* matrix_ptr) {
|
||||
std::array<float, 16>& matrix = *matrix_ptr;
|
||||
// The resulting matrix is multiplication of below commented out matrices:
|
||||
// post_scale_matrix
|
||||
// * translate_matrix
|
||||
// * rotate_matrix
|
||||
// * flip_matrix
|
||||
// * scale_matrix
|
||||
// * initial_translate_matrix
|
||||
|
||||
// Matrix to convert X,Y to [-0.5, 0.5] range "initial_translate_matrix"
|
||||
// { 1.0f, 0.0f, 0.0f, -0.5f}
|
||||
// { 0.0f, 1.0f, 0.0f, -0.5f}
|
||||
// { 0.0f, 0.0f, 1.0f, 0.0f}
|
||||
// { 0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
const float a = sub_rect.width;
|
||||
const float b = sub_rect.height;
|
||||
// Matrix to scale X,Y,Z to sub rect "scale_matrix"
|
||||
// Z has the same scale as X.
|
||||
// { a, 0.0f, 0.0f, 0.0f}
|
||||
// {0.0f, b, 0.0f, 0.0f}
|
||||
// {0.0f, 0.0f, a, 0.0f}
|
||||
// {0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
const float flip = flip_horizontaly ? -1 : 1;
|
||||
// Matrix for optional horizontal flip around middle of output image.
|
||||
// { fl , 0.0f, 0.0f, 0.0f}
|
||||
// { 0.0f, 1.0f, 0.0f, 0.0f}
|
||||
// { 0.0f, 0.0f, 1.0f, 0.0f}
|
||||
// { 0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
const float c = std::cos(sub_rect.rotation);
|
||||
const float d = std::sin(sub_rect.rotation);
|
||||
// Matrix to do rotation around Z axis "rotate_matrix"
|
||||
// { c, -d, 0.0f, 0.0f}
|
||||
// { d, c, 0.0f, 0.0f}
|
||||
// { 0.0f, 0.0f, 1.0f, 0.0f}
|
||||
// { 0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
const float e = sub_rect.center_x;
|
||||
const float f = sub_rect.center_y;
|
||||
// Matrix to do X,Y translation of sub rect within parent rect
|
||||
// "translate_matrix"
|
||||
// {1.0f, 0.0f, 0.0f, e }
|
||||
// {0.0f, 1.0f, 0.0f, f }
|
||||
// {0.0f, 0.0f, 1.0f, 0.0f}
|
||||
// {0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
const float g = 1.0f / rect_width;
|
||||
const float h = 1.0f / rect_height;
|
||||
// Matrix to scale X,Y,Z to [0.0, 1.0] range "post_scale_matrix"
|
||||
// {g, 0.0f, 0.0f, 0.0f}
|
||||
// {0.0f, h, 0.0f, 0.0f}
|
||||
// {0.0f, 0.0f, g, 0.0f}
|
||||
// {0.0f, 0.0f, 0.0f, 1.0f}
|
||||
|
||||
// row 1
|
||||
matrix[0] = a * c * flip * g;
|
||||
matrix[1] = -b * d * g;
|
||||
matrix[2] = 0.0f;
|
||||
matrix[3] = (-0.5f * a * c * flip + 0.5f * b * d + e) * g;
|
||||
|
||||
// row 2
|
||||
matrix[4] = a * d * flip * h;
|
||||
matrix[5] = b * c * h;
|
||||
matrix[6] = 0.0f;
|
||||
matrix[7] = (-0.5f * b * c - 0.5f * a * d * flip + f) * h;
|
||||
|
||||
// row 3
|
||||
matrix[8] = 0.0f;
|
||||
matrix[9] = 0.0f;
|
||||
matrix[10] = a * g;
|
||||
matrix[11] = 0.0f;
|
||||
|
||||
// row 4
|
||||
matrix[12] = 0.0f;
|
||||
matrix[13] = 0.0f;
|
||||
matrix[14] = 0.0f;
|
||||
matrix[15] = 1.0f;
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
82
mediapipe/calculators/tensor/image_to_tensor_utils.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
|
||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
struct RotatedRect {
|
||||
float center_x;
|
||||
float center_y;
|
||||
float width;
|
||||
float height;
|
||||
float rotation;
|
||||
};
|
||||
|
||||
// Generates a new ROI or converts it from normalized rect.
|
||||
RotatedRect GetRoi(int input_width, int input_height,
|
||||
absl::optional<mediapipe::NormalizedRect> norm_rect);
|
||||
|
||||
// Pads ROI, so extraction happens correctly if aspect ratio is to be kept.
|
||||
// Returns letterbox padding applied.
|
||||
::mediapipe::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
|
||||
int input_tensor_height,
|
||||
bool keep_aspect_ratio,
|
||||
RotatedRect* roi);
|
||||
|
||||
// Represents a transformation of value which involves scaling and offsetting.
|
||||
// To apply transformation:
|
||||
// ValueTransformation transform = ...
|
||||
// float transformed_value = transform.scale * value + transfrom.offset;
|
||||
struct ValueTransformation {
|
||||
float scale;
|
||||
float offset;
|
||||
};
|
||||
|
||||
// Returns value transformation to apply to a value in order to convert it from
|
||||
// [from_range_min, from_range_max] into [to_range_min, to_range_max] range.
|
||||
// from_range_min must be less than from_range_max
|
||||
// to_range_min must be less than to_range_max
|
||||
::mediapipe::StatusOr<ValueTransformation> GetValueRangeTransformation(
|
||||
float from_range_min, float from_range_max, float to_range_min,
|
||||
float to_range_max);
|
||||
|
||||
// Populates 4x4 "matrix" with row major order transformation matrix which
|
||||
// maps (x, y) in range [0, 1] (describing points of @sub_rect)
|
||||
// to (x', y') in range [0, 1]*** (describing points of a rect:
|
||||
// [0, @rect_width] x [0, @rect_height] = RECT).
|
||||
//
|
||||
// *** (x', y') will go out of the range for points from @sub_rect
|
||||
// which are not contained by RECT and it's expected behavior
|
||||
//
|
||||
// @sub_rect - rotated sub rect in absolute coordinates
|
||||
// @rect_width - rect width
|
||||
// @rect_height - rect height
|
||||
// @flip_horizontaly - we need to flip the output buffer.
|
||||
// @matrix - 4x4 matrix (array of 16 elements) to populate
|
||||
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
|
||||
int rect_width, int rect_height,
|
||||
bool flip_horizontaly,
|
||||
std::array<float, 16>* matrix);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
|
161
mediapipe/calculators/tensor/image_to_tensor_utils_test.cc
Normal file
|
@ -0,0 +1,161 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
using ::testing::ElementsAre;
|
||||
using ::testing::ElementsAreArray;
|
||||
|
||||
testing::Matcher<RotatedRect> EqRotatedRect(float width, float height,
|
||||
float center_x, float center_y,
|
||||
float rotation) {
|
||||
return testing::AllOf(
|
||||
testing::Field(&RotatedRect::width, testing::FloatEq(width)),
|
||||
testing::Field(&RotatedRect::height, testing::FloatEq(height)),
|
||||
testing::Field(&RotatedRect::center_x, testing::FloatEq(center_x)),
|
||||
testing::Field(&RotatedRect::center_y, testing::FloatEq(center_y)),
|
||||
testing::Field(&RotatedRect::rotation, testing::FloatEq(rotation)));
|
||||
}
|
||||
|
||||
TEST(GetRoi, NoNormRect) {
|
||||
EXPECT_THAT(GetRoi(4, 4, {}), EqRotatedRect(4, 4, 2, 2, 0));
|
||||
EXPECT_THAT(GetRoi(25, 15, {}), EqRotatedRect(25, 15, 12.5f, 7.5f, 0));
|
||||
}
|
||||
|
||||
TEST(GetRoi, WholeImageNormRect) {
|
||||
mediapipe::NormalizedRect norm_rect;
|
||||
norm_rect.set_width(1.0f);
|
||||
norm_rect.set_height(1.0f);
|
||||
norm_rect.set_x_center(0.5f);
|
||||
norm_rect.set_y_center(0.5f);
|
||||
norm_rect.set_rotation(0.0f);
|
||||
EXPECT_THAT(GetRoi(4, 4, norm_rect), EqRotatedRect(4, 4, 2, 2, 0));
|
||||
EXPECT_THAT(GetRoi(25, 15, norm_rect), EqRotatedRect(25, 15, 12.5f, 7.5f, 0));
|
||||
}
|
||||
|
||||
TEST(GetRoi, ExpandedNormRect) {
|
||||
mediapipe::NormalizedRect norm_rect;
|
||||
norm_rect.set_width(4.0f);
|
||||
norm_rect.set_height(2.0f);
|
||||
norm_rect.set_x_center(0.5f);
|
||||
norm_rect.set_y_center(1.0f);
|
||||
norm_rect.set_rotation(3.0f);
|
||||
EXPECT_THAT(GetRoi(4, 4, norm_rect), EqRotatedRect(16, 8, 2, 4, 3));
|
||||
EXPECT_THAT(GetRoi(25, 15, norm_rect), EqRotatedRect(100, 30, 12.5f, 15, 3));
|
||||
}
|
||||
|
||||
TEST(PadRoi, NoPadding) {
|
||||
RotatedRect roi{.center_x = 20,
|
||||
.center_y = 10,
|
||||
.width = 100,
|
||||
.height = 200,
|
||||
.rotation = 5};
|
||||
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/false, &roi);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
ElementsAreArray({0.0f, 0.0f, 0.0f, 0.0f}));
|
||||
EXPECT_THAT(roi, EqRotatedRect(100, 200, 20, 10, 5));
|
||||
}
|
||||
|
||||
TEST(PadRoi, HorizontalPadding) {
|
||||
RotatedRect roi{.center_x = 20,
|
||||
.center_y = 10,
|
||||
.width = 100,
|
||||
.height = 200,
|
||||
.rotation = 5};
|
||||
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/true, &roi);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
ElementsAreArray({0.25f, 0.0f, 0.25f, 0.0f}));
|
||||
EXPECT_THAT(roi, EqRotatedRect(200, 200, 20, 10, 5));
|
||||
}
|
||||
|
||||
TEST(PadRoi, VerticalPadding) {
|
||||
RotatedRect roi{
|
||||
.center_x = 1, .center_y = 2, .width = 21, .height = 19, .rotation = 3};
|
||||
const float expected_horizontal_padding = (21 - 19) / 2.0f / 21;
|
||||
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/true, &roi);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(
|
||||
status_or_value.ValueOrDie(),
|
||||
ElementsAre(testing::FloatEq(0.0f),
|
||||
testing::FloatNear(expected_horizontal_padding, 1e-6),
|
||||
testing::FloatEq(0.0f),
|
||||
testing::FloatNear(expected_horizontal_padding, 1e-6)));
|
||||
EXPECT_THAT(roi, EqRotatedRect(21, 21, 1, 2, 3));
|
||||
}
|
||||
|
||||
testing::Matcher<ValueTransformation> EqValueTransformation(float scale,
|
||||
float offset) {
|
||||
return ::testing::AllOf(
|
||||
testing::Field(&ValueTransformation::scale, testing::FloatEq(scale)),
|
||||
testing::Field(&ValueTransformation::offset, testing::FloatEq(offset)));
|
||||
}
|
||||
|
||||
TEST(GetValueRangeTransformation, PixelToFloatZeroCenter) {
|
||||
auto status_or_value = GetValueRangeTransformation(
|
||||
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
|
||||
/*to_range_min=*/-1.0f, /*to_range_max=*/1.0f);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
EqValueTransformation(/*scale=*/2 / 255.0f,
|
||||
/*offset=*/-1.0f));
|
||||
}
|
||||
|
||||
TEST(GetValueRangeTransformation, PixelToFloat) {
|
||||
auto status_or_value = GetValueRangeTransformation(
|
||||
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
|
||||
/*to_range_min=*/0.0f, /*to_range_max=*/1.0f);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
EqValueTransformation(/*scale=*/1 / 255.0f,
|
||||
/*offset=*/0.0f));
|
||||
}
|
||||
|
||||
TEST(GetValueRangeTransformation, FloatToFloatNoOp) {
|
||||
auto status_or_value = GetValueRangeTransformation(
|
||||
/*from_range_min=*/0.0f, /*from_range_max=*/1.0f,
|
||||
/*to_range_min=*/0.0f, /*to_range_max=*/1.0f);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
EqValueTransformation(/*scale=*/1.0f, /*offset=*/0.0f));
|
||||
}
|
||||
|
||||
TEST(GetValueRangeTransformation, PixelToPixelNoOp) {
|
||||
auto status_or_value = GetValueRangeTransformation(
|
||||
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
|
||||
/*to_range_min=*/0.0f, /*to_range_max=*/255.0f);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
EqValueTransformation(/*scale=*/1.0f, /*offset=*/0.0f));
|
||||
}
|
||||
|
||||
TEST(GetValueRangeTransformation, FloatToPixel) {
|
||||
auto status_or_value = GetValueRangeTransformation(
|
||||
/*from_range_min=*/0.0f, /*from_range_max=*/1.0f,
|
||||
/*to_range_min=*/0.0f, /*to_range_max=*/255.0f);
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(),
|
||||
EqValueTransformation(/*scale=*/255.0f, /*offset=*/0.0f));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
832
mediapipe/calculators/tensor/inference_calculator.cc
Normal file
|
@ -0,0 +1,832 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/tflite/config.h"
|
||||
|
||||
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
|
||||
#include "mediapipe/util/cpu_util.h"
|
||||
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
|
||||
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
#include "tensorflow/lite/error_reporter.h"
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
#include "tensorflow/lite/kernels/register.h"
|
||||
#include "tensorflow/lite/model.h"
|
||||
|
||||
#if defined(MEDIAPIPE_ANDROID)
|
||||
#include "mediapipe/util/android/file/base/file.h"
|
||||
#include "mediapipe/util/android/file/base/filesystem.h"
|
||||
#include "mediapipe/util/android/file/base/helpers.h"
|
||||
#endif // ANDROID
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "mediapipe/util/tflite/tflite_gpu_runner.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
#if MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
#import <CoreVideo/CoreVideo.h>
|
||||
#import <Metal/Metal.h>
|
||||
#import <MetalKit/MetalKit.h>
|
||||
|
||||
#import "mediapipe/gpu/MPPMetalHelper.h"
|
||||
#include "mediapipe/gpu/MPPMetalUtil.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
|
||||
#endif // MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
|
||||
#if !defined(MEDIAPIPE_EDGE_TPU)
|
||||
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
|
||||
#endif // !EDGETPU
|
||||
#if defined(MEDIAPIPE_ANDROID)
|
||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||
#endif // ANDROID
|
||||
|
||||
namespace {
|
||||
// Commonly used to compute the number of blocks to launch in a kernel.
|
||||
int NumGroups(const int size, const int group_size) { // NOLINT
|
||||
return (size + group_size - 1) / group_size;
|
||||
}
|
||||
|
||||
// Round up n to next multiple of m.
|
||||
template <typename T>
|
||||
T RoundUp(T n, T m) {
|
||||
return ((n + m - T{1}) / m) * m;
|
||||
}
|
||||
|
||||
bool ShouldUseGpu(const mediapipe::InferenceCalculatorOptions& options) {
|
||||
return (
|
||||
!options.has_delegate() || // Use GPU delegate if delegate not specified
|
||||
(options.has_delegate() && options.delegate().has_gpu()));
|
||||
}
|
||||
|
||||
constexpr char kTensorsTag[] = "TENSORS";
|
||||
} // namespace
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
#include "edgetpu.h"
|
||||
|
||||
// Creates and returns an Edge TPU interpreter to run the given edgetpu model.
|
||||
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
|
||||
const tflite::FlatBufferModel& model,
|
||||
tflite::ops::builtin::BuiltinOpResolver* resolver,
|
||||
edgetpu::EdgeTpuContext* edgetpu_context) {
|
||||
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
|
||||
std::unique_ptr<tflite::Interpreter> interpreter;
|
||||
if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) {
|
||||
std::cerr << "Failed to build edge TPU interpreter." << std::endl;
|
||||
}
|
||||
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
|
||||
interpreter->SetNumThreads(1);
|
||||
if (interpreter->AllocateTensors() != kTfLiteOk) {
|
||||
std::cerr << "Failed to allocate edge TPU tensors." << std::endl;
|
||||
}
|
||||
return interpreter;
|
||||
}
|
||||
#endif // MEDIAPIPE_EDGE_TPU
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
#if MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
namespace {
|
||||
tflite::gpu::BHWC BhwcFromTensorShape(const Tensor::Shape& shape) {
|
||||
tflite::gpu::BHWC result;
|
||||
result.b = shape.dims[0];
|
||||
switch (shape.dims.size()) {
|
||||
case 1:
|
||||
// result.b is already filled.
|
||||
break;
|
||||
case 2:
|
||||
result.h = 1;
|
||||
result.w = 1;
|
||||
result.c = shape.dims[1];
|
||||
break;
|
||||
case 3:
|
||||
result.h = 1;
|
||||
result.w = shape.dims[1];
|
||||
result.c = shape.dims[2];
|
||||
break;
|
||||
case 4:
|
||||
result.h = shape.dims[1];
|
||||
result.w = shape.dims[2];
|
||||
result.c = shape.dims[3];
|
||||
break;
|
||||
default:
|
||||
// Handles 0 and >4.
|
||||
LOG(FATAL)
|
||||
<< "Dimensions size must be in range [1,4] for GPU inference, but "
|
||||
<< shape.dims.size() << " is provided";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
#endif // MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
|
||||
// Returns number of threads to configure XNNPACK delegate with.
|
||||
// (Equal to user provided value if specified. Otherwise, it returns number of
|
||||
// high cores (hard-coded to 1 for Emscripten without Threads extension))
|
||||
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
|
||||
static constexpr int kDefaultNumThreads = -1;
|
||||
if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
|
||||
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
|
||||
return opts.delegate().xnnpack().num_threads();
|
||||
}
|
||||
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
|
||||
return InferHigherCoreIds().size();
|
||||
#else
|
||||
return 1;
|
||||
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
|
||||
}
|
||||
|
||||
// Calculator Header Section
|
||||
|
||||
// Runs inference on the provided input Tensors and TFLite model.
|
||||
//
|
||||
// Creates an interpreter with given model and calls invoke().
|
||||
// Optionally run inference on CPU/GPU.
|
||||
//
|
||||
// This calculator can be used with TensorConverterCalculator to get the
|
||||
// appropriate inputs.
|
||||
//
|
||||
// When the input tensors are on CPU, gpu inference is optional and can be
|
||||
// specified in the calculator options.
|
||||
// When the input tensors are on GPU, inference is GPU and output can be CPU or
|
||||
// GPU.
|
||||
//
|
||||
// Input:
|
||||
// TENSORS - Vector of Tensors
|
||||
//
|
||||
// Output:
|
||||
// TENSORS - Vector of Tensors
|
||||
//
|
||||
// Input side packet:
|
||||
// CUSTOM_OP_RESOLVER (optional) - Use a custom op resolver,
|
||||
// instead of the builtin one.
|
||||
// MODEL (optional) - Use to specify TfLite model
|
||||
// (std::unique_ptr<tflite::FlatBufferModel,
|
||||
// std::function<void(tflite::FlatBufferModel*)>>)
|
||||
//
|
||||
// Example use:
|
||||
// node {
|
||||
// calculator: "InferenceCalculator"
|
||||
// input_stream: "TENSORS:tensor_image"
|
||||
// output_stream: "TENSORS:tensors"
|
||||
// options: {
|
||||
// [mediapipe.InferenceCalculatorOptions.ext] {
|
||||
// model_path: "modelname.tflite"
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// or
|
||||
//
|
||||
// node {
|
||||
// calculator: "InferenceCalculator"
|
||||
// input_stream: "TENSORS:tensor_image"
|
||||
// input_side_packet: "MODEL:model"
|
||||
// output_stream: "TENSORS:tensors"
|
||||
// options: {
|
||||
// [mediapipe.InferenceCalculatorOptions.ext] {
|
||||
// model_path: "modelname.tflite"
|
||||
// delegate { gpu {} }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// IMPORTANT Notes:
|
||||
// Tensors are assumed to be ordered correctly (sequentially added to model).
|
||||
// Input tensors are assumed to be of the correct size and already normalized.
|
||||
|
||||
class InferenceCalculator : public CalculatorBase {
|
||||
public:
|
||||
using TfLiteDelegatePtr =
|
||||
std::unique_ptr<TfLiteDelegate, std::function<void(TfLiteDelegate*)>>;
|
||||
using TfLiteModelPtr =
|
||||
std::unique_ptr<tflite::FlatBufferModel,
|
||||
std::function<void(tflite::FlatBufferModel*)>>;
|
||||
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Close(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
::mediapipe::Status ReadKernelsFromFile();
|
||||
::mediapipe::Status WriteKernelsToFile();
|
||||
::mediapipe::Status LoadModel(CalculatorContext* cc);
|
||||
::mediapipe::StatusOr<Packet> GetModelAsPacket(const CalculatorContext& cc);
|
||||
::mediapipe::Status LoadDelegate(CalculatorContext* cc);
|
||||
::mediapipe::Status InitTFLiteGPURunner(CalculatorContext* cc);
|
||||
|
||||
Packet model_packet_;
|
||||
std::unique_ptr<tflite::Interpreter> interpreter_;
|
||||
TfLiteDelegatePtr delegate_;
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
mediapipe::GlCalculatorHelper gpu_helper_;
|
||||
std::unique_ptr<tflite::gpu::TFLiteGPURunner> tflite_gpu_runner_;
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
MPPMetalHelper* gpu_helper_ = nullptr;
|
||||
TFLBufferConvert* converter_to_BPHWC4_ = nil;
|
||||
TFLBufferConvert* converter_from_BPHWC4_ = nil;
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
std::vector<Tensor::Shape> output_shapes_;
|
||||
std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_;
|
||||
std::vector<std::unique_ptr<Tensor>> gpu_buffers_out_;
|
||||
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_ =
|
||||
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
|
||||
#endif
|
||||
|
||||
bool use_advanced_gpu_api_ = false;
|
||||
bool use_gpu_delegate_ = false;
|
||||
|
||||
bool use_kernel_caching_ = false;
|
||||
std::string cached_kernel_filename_;
|
||||
};
|
||||
REGISTER_CALCULATOR(InferenceCalculator);
|
||||
|
||||
::mediapipe::Status InferenceCalculator::GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kTensorsTag));
|
||||
cc->Inputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
|
||||
RET_CHECK(cc->Outputs().HasTag(kTensorsTag));
|
||||
cc->Outputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
|
||||
|
||||
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
||||
RET_CHECK(!options.model_path().empty() ^
|
||||
cc->InputSidePackets().HasTag("MODEL"))
|
||||
<< "Either model as side packet or model path in options is required.";
|
||||
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Set<tflite::ops::builtin::BuiltinOpResolver>();
|
||||
}
|
||||
if (cc->InputSidePackets().HasTag("MODEL")) {
|
||||
cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
|
||||
}
|
||||
|
||||
if (ShouldUseGpu(options)) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
|
||||
#endif
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE || MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
|
||||
if (ShouldUseGpu(options)) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
use_advanced_gpu_api_ = options.has_delegate() &&
|
||||
options.delegate().has_gpu() &&
|
||||
options.delegate().gpu().use_advanced_gpu_api();
|
||||
use_kernel_caching_ =
|
||||
use_advanced_gpu_api_ && options.delegate().gpu().use_kernel_caching();
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
use_gpu_delegate_ = !use_advanced_gpu_api_;
|
||||
}
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE || MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
|
||||
if (use_kernel_caching_) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
|
||||
cached_kernel_filename_ =
|
||||
"/sdcard/" + mediapipe::File::Basename(options.model_path()) + ".ker";
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
|
||||
}
|
||||
|
||||
// When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
|
||||
// for everything.
|
||||
if (!use_advanced_gpu_api_) {
|
||||
MP_RETURN_IF_ERROR(LoadModel(cc));
|
||||
}
|
||||
|
||||
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||
MP_RETURN_IF_ERROR(
|
||||
gpu_helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
|
||||
return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc)
|
||||
: LoadDelegate(cc);
|
||||
}));
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||
RET_CHECK(gpu_helper_);
|
||||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||
#endif
|
||||
} else {
|
||||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::Process(CalculatorContext* cc) {
|
||||
if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
const auto& input_tensors =
|
||||
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
|
||||
RET_CHECK(!input_tensors.empty());
|
||||
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
|
||||
|
||||
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
if (use_advanced_gpu_api_) {
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
|
||||
[this, &input_tensors, &output_tensors]() -> ::mediapipe::Status {
|
||||
for (int i = 0; i < input_tensors.size(); ++i) {
|
||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToInputTensor(
|
||||
input_tensors[i].GetOpenGlBufferReadView().name(), i));
|
||||
}
|
||||
output_tensors->reserve(output_shapes_.size());
|
||||
for (int i = 0; i < output_shapes_.size(); ++i) {
|
||||
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
|
||||
output_shapes_[i]);
|
||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToOutputTensor(
|
||||
output_tensors->back().GetOpenGlBufferWriteView().name(), i));
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
} else {
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
|
||||
[this, &input_tensors]() -> ::mediapipe::Status {
|
||||
// Explicitly copy input.
|
||||
for (int i = 0; i < input_tensors.size(); ++i) {
|
||||
glBindBuffer(GL_COPY_READ_BUFFER,
|
||||
input_tensors[i].GetOpenGlBufferReadView().name());
|
||||
glBindBuffer(
|
||||
GL_COPY_WRITE_BUFFER,
|
||||
gpu_buffers_in_[i]->GetOpenGlBufferWriteView().name());
|
||||
glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0,
|
||||
0, input_tensors[i].bytes());
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
}
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
// Explicit copy input with conversion float 32 bits to 16 bits.
|
||||
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
|
||||
command_buffer.label = @"InferenceCalculatorConvert";
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
[command_buffer computeCommandEncoder];
|
||||
for (int i = 0; i < input_tensors.size(); ++i) {
|
||||
auto input_view = input_tensors[i].GetMtlBufferReadView(command_buffer);
|
||||
// Reshape tensor.
|
||||
tflite::gpu::BHWC shape = BhwcFromTensorShape(input_tensors[i].shape());
|
||||
auto gpu_buffer_view =
|
||||
gpu_buffers_in_[i]->GetMtlBufferWriteView(command_buffer);
|
||||
[converter_to_BPHWC4_ convertWithEncoder:compute_encoder
|
||||
shape:shape
|
||||
sourceBuffer:input_view.buffer()
|
||||
convertedBuffer:gpu_buffer_view.buffer()];
|
||||
}
|
||||
[compute_encoder endEncoding];
|
||||
[command_buffer commit];
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
} else {
|
||||
// Read CPU input into tensors.
|
||||
for (int i = 0; i < input_tensors.size(); ++i) {
|
||||
const Tensor* input_tensor = &input_tensors[i];
|
||||
auto input_tensor_view = input_tensor->GetCpuReadView();
|
||||
auto input_tensor_buffer = input_tensor_view.buffer<float>();
|
||||
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i);
|
||||
std::memcpy(local_tensor_buffer, input_tensor_buffer,
|
||||
input_tensor->bytes());
|
||||
}
|
||||
}
|
||||
|
||||
// Run inference.
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
if (use_advanced_gpu_api_) {
|
||||
RET_CHECK(tflite_gpu_runner_->Invoke().ok());
|
||||
} else {
|
||||
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
|
||||
}
|
||||
#else
|
||||
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
if (use_gpu_delegate_) {
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
|
||||
[this, &output_tensors]() -> ::mediapipe::Status {
|
||||
output_tensors->reserve(output_shapes_.size());
|
||||
for (int i = 0; i < output_shapes_.size(); ++i) {
|
||||
const auto& t = gpu_buffers_out_[i];
|
||||
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
|
||||
gpu_buffers_out_[i]->shape());
|
||||
auto read_view = t->GetOpenGlBufferReadView();
|
||||
glBindBuffer(GL_COPY_READ_BUFFER, read_view.name());
|
||||
auto write_view =
|
||||
output_tensors->back().GetOpenGlBufferWriteView();
|
||||
glBindBuffer(GL_COPY_WRITE_BUFFER, write_view.name());
|
||||
glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0,
|
||||
0, t->bytes());
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
}
|
||||
// Output tensors are already bound if use_advanced_gpu_api_ is true.
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
|
||||
command_buffer.label = @"InferenceBPHWC4Convert";
|
||||
id<MTLComputeCommandEncoder> convert_command =
|
||||
[command_buffer computeCommandEncoder];
|
||||
output_tensors->reserve(output_shapes_.size());
|
||||
for (int i = 0; i < output_shapes_.size(); ++i) {
|
||||
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
|
||||
output_shapes_[i]);
|
||||
// Reshape tensor.
|
||||
tflite::gpu::BHWC shape = BhwcFromTensorShape(output_shapes_[i]);
|
||||
auto read_view =
|
||||
gpu_buffers_out_[i]->GetMtlBufferReadView(command_buffer);
|
||||
auto write_view =
|
||||
output_tensors->at(i).GetMtlBufferWriteView(command_buffer);
|
||||
[converter_from_BPHWC4_ convertWithEncoder:convert_command
|
||||
shape:shape
|
||||
sourceBuffer:read_view.buffer()
|
||||
convertedBuffer:write_view.buffer()];
|
||||
}
|
||||
[convert_command endEncoding];
|
||||
[command_buffer commit];
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
} else {
|
||||
// Output result tensors (CPU).
|
||||
const auto& tensor_indexes = interpreter_->outputs();
|
||||
output_tensors->reserve(tensor_indexes.size());
|
||||
for (int i = 0; i < tensor_indexes.size(); ++i) {
|
||||
TfLiteTensor* tensor = interpreter_->tensor(tensor_indexes[i]);
|
||||
output_tensors->emplace_back(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{std::vector<int>{
|
||||
tensor->dims->data, tensor->dims->data + tensor->dims->size}});
|
||||
auto cpu_view = output_tensors->back().GetCpuWriteView();
|
||||
std::memcpy(cpu_view.buffer<float>(), tensor->data.f,
|
||||
output_tensors->back().bytes());
|
||||
}
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kTensorsTag)
|
||||
.Add(output_tensors.release(), cc->InputTimestamp());
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::WriteKernelsToFile() {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
|
||||
if (use_kernel_caching_) {
|
||||
// Save kernel file.
|
||||
auto kernel_cache = absl::make_unique<std::vector<uint8_t>>(
|
||||
tflite_gpu_runner_->GetSerializedBinaryCache());
|
||||
std::string cache_str(kernel_cache->begin(), kernel_cache->end());
|
||||
MP_RETURN_IF_ERROR(
|
||||
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
|
||||
}
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::Close(CalculatorContext* cc) {
|
||||
MP_RETURN_IF_ERROR(WriteKernelsToFile());
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
if (use_gpu_delegate_) {
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
|
||||
gpu_buffers_in_.clear();
|
||||
gpu_buffers_out_.clear();
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
}
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
converter_to_BPHWC4_ = nil;
|
||||
converter_from_BPHWC4_ = nil;
|
||||
gpu_buffers_in_.clear();
|
||||
gpu_buffers_out_.clear();
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
edgetpu_context_.reset();
|
||||
#endif
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::ReadKernelsFromFile() {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
|
||||
if (use_kernel_caching_) {
|
||||
// Load pre-compiled kernel file.
|
||||
if (mediapipe::File::Exists(cached_kernel_filename_)) {
|
||||
std::string cache_str;
|
||||
MP_RETURN_IF_ERROR(
|
||||
mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
|
||||
std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
|
||||
tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
|
||||
}
|
||||
}
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::InitTFLiteGPURunner(
|
||||
CalculatorContext* cc) {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
|
||||
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
|
||||
tflite::ops::builtin::BuiltinOpResolver op_resolver;
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
op_resolver = cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Get<tflite::ops::builtin::BuiltinOpResolver>();
|
||||
}
|
||||
|
||||
// Create runner
|
||||
tflite::gpu::InferenceOptions options;
|
||||
options.priority1 = tflite::gpu::InferencePriority::MIN_LATENCY;
|
||||
options.priority2 = tflite::gpu::InferencePriority::AUTO;
|
||||
options.priority3 = tflite::gpu::InferencePriority::AUTO;
|
||||
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
|
||||
tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
|
||||
MP_RETURN_IF_ERROR(
|
||||
tflite_gpu_runner_->InitializeWithModel(model, op_resolver));
|
||||
|
||||
// Create and bind OpenGL buffers for outputs.
|
||||
// The buffers are created once and their ids are passed to calculator outputs
|
||||
output_shapes_.resize(tflite_gpu_runner_->outputs_size());
|
||||
for (int i = 0; i < tflite_gpu_runner_->outputs_size(); ++i) {
|
||||
output_shapes_[i] = {tflite_gpu_runner_->GetOutputShapes()[i].b,
|
||||
tflite_gpu_runner_->GetOutputShapes()[i].h,
|
||||
tflite_gpu_runner_->GetOutputShapes()[i].w,
|
||||
tflite_gpu_runner_->GetOutputShapes()[i].c};
|
||||
}
|
||||
|
||||
MP_RETURN_IF_ERROR(ReadKernelsFromFile());
|
||||
|
||||
MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::LoadModel(CalculatorContext* cc) {
|
||||
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
|
||||
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
|
||||
tflite::ops::builtin::BuiltinOpResolver op_resolver;
|
||||
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
|
||||
op_resolver = cc->InputSidePackets()
|
||||
.Tag("CUSTOM_OP_RESOLVER")
|
||||
.Get<tflite::ops::builtin::BuiltinOpResolver>();
|
||||
}
|
||||
|
||||
#if defined(MEDIAPIPE_EDGE_TPU)
|
||||
interpreter_ =
|
||||
BuildEdgeTpuInterpreter(model, &op_resolver, edgetpu_context_.get());
|
||||
#else
|
||||
tflite::InterpreterBuilder(model, op_resolver)(&interpreter_);
|
||||
#endif // MEDIAPIPE_EDGE_TPU
|
||||
RET_CHECK(interpreter_);
|
||||
|
||||
#if defined(__EMSCRIPTEN__) || defined(MEDIAPIPE_EDGE_TPU)
|
||||
interpreter_->SetNumThreads(1);
|
||||
#else
|
||||
interpreter_->SetNumThreads(
|
||||
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
|
||||
#endif // __EMSCRIPTEN__
|
||||
|
||||
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
||||
// TODO: Support quantized tensors.
|
||||
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type !=
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Packet> InferenceCalculator::GetModelAsPacket(
|
||||
const CalculatorContext& cc) {
|
||||
const auto& options = cc.Options<mediapipe::InferenceCalculatorOptions>();
|
||||
if (!options.model_path().empty()) {
|
||||
std::string model_path = options.model_path();
|
||||
|
||||
ASSIGN_OR_RETURN(model_path, mediapipe::PathToResourceAsFile(model_path));
|
||||
|
||||
auto model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
|
||||
RET_CHECK(model) << "Failed to load model from path.";
|
||||
return MakePacket<TfLiteModelPtr>(TfLiteModelPtr(
|
||||
model.release(), [](tflite::FlatBufferModel* model) { delete model; }));
|
||||
}
|
||||
if (cc.InputSidePackets().HasTag("MODEL")) {
|
||||
return cc.InputSidePackets().Tag("MODEL");
|
||||
}
|
||||
return ::mediapipe::Status(
|
||||
::mediapipe::StatusCode::kNotFound,
|
||||
"Must specify TFLite model as path or loaded model.");
|
||||
}
|
||||
|
||||
::mediapipe::Status InferenceCalculator::LoadDelegate(CalculatorContext* cc) {
|
||||
const auto& calculator_opts =
|
||||
cc->Options<mediapipe::InferenceCalculatorOptions>();
|
||||
if (calculator_opts.has_delegate() &&
|
||||
calculator_opts.delegate().has_tflite()) {
|
||||
// Default tflite inference requeqsted - no need to modify graph.
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
if (!use_gpu_delegate_) {
|
||||
#if defined(MEDIAPIPE_ANDROID)
|
||||
const bool nnapi_requested = calculator_opts.has_delegate()
|
||||
? calculator_opts.delegate().has_nnapi()
|
||||
: calculator_opts.use_nnapi();
|
||||
if (nnapi_requested) {
|
||||
// Attempt to use NNAPI.
|
||||
// If not supported, the default CPU delegate will be created and used.
|
||||
interpreter_->SetAllowFp16PrecisionForFp32(1);
|
||||
delegate_ =
|
||||
TfLiteDelegatePtr(tflite::NnApiDelegate(), [](TfLiteDelegate*) {
|
||||
// No need to free according to tflite::NnApiDelegate()
|
||||
// documentation.
|
||||
});
|
||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||
kTfLiteOk);
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
#endif // MEDIAPIPE_ANDROID
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
const bool xnnpack_requested = true;
|
||||
#else
|
||||
const bool xnnpack_requested = calculator_opts.has_delegate() &&
|
||||
calculator_opts.delegate().has_xnnpack();
|
||||
#endif // __EMSCRIPTEN__
|
||||
|
||||
#if !defined(MEDIAPIPE_EDGE_TPU)
|
||||
if (xnnpack_requested) {
|
||||
TfLiteXNNPackDelegateOptions xnnpack_opts{};
|
||||
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
|
||||
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
|
||||
&TfLiteXNNPackDelegateDelete);
|
||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||
kTfLiteOk);
|
||||
}
|
||||
#endif // !EDGETPU
|
||||
|
||||
// Return, no need for GPU delegate below.
|
||||
return ::mediapipe::OkStatus();
|
||||
} else {
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
// Configure and create the delegate.
|
||||
TfLiteGpuDelegateOptions options = TfLiteGpuDelegateOptionsDefault();
|
||||
options.compile_options.precision_loss_allowed = 1;
|
||||
options.compile_options.preferred_gl_object_type =
|
||||
TFLITE_GL_OBJECT_TYPE_FASTEST;
|
||||
options.compile_options.dynamic_batch_enabled = 0;
|
||||
options.compile_options.inline_parameters = 1;
|
||||
delegate_ = TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options),
|
||||
&TfLiteGpuDelegateDelete);
|
||||
|
||||
// Get input image sizes.
|
||||
const auto& input_indices = interpreter_->inputs();
|
||||
for (int i = 0; i < input_indices.size(); ++i) {
|
||||
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[i]);
|
||||
gpu_buffers_in_.emplace_back(absl::make_unique<Tensor>(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{std::vector<int>{
|
||||
tensor->dims->data, tensor->dims->data + tensor->dims->size}}));
|
||||
RET_CHECK_EQ(
|
||||
TfLiteGpuDelegateBindBufferToTensor(
|
||||
delegate_.get(),
|
||||
gpu_buffers_in_.back()->GetOpenGlBufferWriteView().name(),
|
||||
interpreter_->inputs()[i]),
|
||||
kTfLiteOk);
|
||||
}
|
||||
interpreter_->SetAllowBufferHandleOutput(true);
|
||||
// Get output image sizes.
|
||||
const auto& output_indices = interpreter_->outputs();
|
||||
output_shapes_.resize(output_indices.size());
|
||||
// Create and bind output buffers.
|
||||
for (int i = 0; i < output_shapes_.size(); ++i) {
|
||||
const TfLiteTensor* tensor = interpreter_->tensor(output_indices[i]);
|
||||
gpu_buffers_out_.emplace_back(absl::make_unique<Tensor>(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{std::vector<int>{
|
||||
tensor->dims->data, tensor->dims->data + tensor->dims->size}}));
|
||||
RET_CHECK_EQ(
|
||||
TfLiteGpuDelegateBindBufferToTensor(
|
||||
delegate_.get(),
|
||||
gpu_buffers_out_.back()->GetOpenGlBufferWriteView().name(),
|
||||
output_indices[i]),
|
||||
kTfLiteOk);
|
||||
}
|
||||
|
||||
// Must call this last.
|
||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||
kTfLiteOk);
|
||||
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
|
||||
// Configure and create the delegate.
|
||||
TFLGpuDelegateOptions options;
|
||||
options.allow_precision_loss = true;
|
||||
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
|
||||
delegate_ = TfLiteDelegatePtr(TFLGpuDelegateCreate(&options),
|
||||
&TFLGpuDelegateDelete);
|
||||
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
|
||||
kTfLiteOk);
|
||||
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||
|
||||
// Get input image sizes.
|
||||
const auto& input_indices = interpreter_->inputs();
|
||||
for (int i = 0; i < input_indices.size(); ++i) {
|
||||
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[i]);
|
||||
// Create and bind input buffer.
|
||||
std::vector<int> dims{tensor->dims->data,
|
||||
tensor->dims->data + tensor->dims->size};
|
||||
dims.back() = RoundUp(dims.back(), 4);
|
||||
gpu_buffers_in_.emplace_back(absl::make_unique<Tensor>(
|
||||
Tensor::ElementType::kFloat16, Tensor::Shape{dims}));
|
||||
auto buffer_view =
|
||||
gpu_buffers_in_[i]->GetMtlBufferWriteView(gpu_helper_.mtlDevice);
|
||||
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
|
||||
delegate_.get(), input_indices[i], buffer_view.buffer()),
|
||||
true);
|
||||
}
|
||||
|
||||
interpreter_->SetAllowBufferHandleOutput(true);
|
||||
// Get output image sizes.
|
||||
const auto& output_indices = interpreter_->outputs();
|
||||
output_shapes_.resize(output_indices.size());
|
||||
for (int i = 0; i < output_shapes_.size(); ++i) {
|
||||
const TfLiteTensor* tensor = interpreter_->tensor(output_indices[i]);
|
||||
RET_CHECK(tensor->dims->size <= 4);
|
||||
// Create and bind output buffers.
|
||||
// Channels are always padded to multiple of 4.
|
||||
std::vector<int> dims{tensor->dims->data,
|
||||
tensor->dims->data + tensor->dims->size};
|
||||
output_shapes_[i] = {dims};
|
||||
dims.back() = RoundUp(dims.back(), 4);
|
||||
gpu_buffers_out_.emplace_back(absl::make_unique<Tensor>(
|
||||
Tensor::ElementType::kFloat16, Tensor::Shape{dims}));
|
||||
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
|
||||
delegate_.get(), output_indices[i],
|
||||
gpu_buffers_out_[i]
|
||||
->GetMtlBufferWriteView(gpu_helper_.mtlDevice)
|
||||
.buffer()),
|
||||
true);
|
||||
}
|
||||
|
||||
// Create converter for GPU input.
|
||||
converter_to_BPHWC4_ = [[TFLBufferConvert alloc] initWithDevice:device
|
||||
isFloat16:true
|
||||
convertToPBHWC4:true];
|
||||
if (converter_to_BPHWC4_ == nil) {
|
||||
return mediapipe::InternalError(
|
||||
"Error initializating input buffer converter");
|
||||
}
|
||||
// Create converter for GPU output.
|
||||
converter_from_BPHWC4_ = [[TFLBufferConvert alloc] initWithDevice:device
|
||||
isFloat16:true
|
||||
convertToPBHWC4:false];
|
||||
if (converter_from_BPHWC4_ == nil) {
|
||||
return mediapipe::InternalError(
|
||||
"Error initializating output buffer converter");
|
||||
}
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
111
mediapipe/calculators/tensor/inference_calculator.proto
Normal file
|
@ -0,0 +1,111 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
// Full Example:
|
||||
//
|
||||
// node {
|
||||
// calculator: "InferenceCalculator"
|
||||
// input_stream: "TENSOR_IN:image_tensors"
|
||||
// output_stream: "TENSOR_OUT:result_tensors"
|
||||
// options {
|
||||
// [mediapipe.InferenceCalculatorOptions.ext] {
|
||||
// model_path: "model.tflite"
|
||||
// delegate { gpu {} }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
message InferenceCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional InferenceCalculatorOptions ext = 336783863;
|
||||
}
|
||||
|
||||
message Delegate {
|
||||
// Default inference provided by tflite.
|
||||
message TfLite {}
|
||||
// Delegate to run GPU inference depending on the device.
|
||||
// (Can use OpenGl, OpenCl, Metal depending on the device.)
|
||||
message Gpu {
|
||||
// Experimental, Android/Linux only. Use TFLite GPU delegate API2 for
|
||||
// the NN inference.
|
||||
// example:
|
||||
// delegate: { gpu { use_advanced_gpu_api: true } }
|
||||
optional bool use_advanced_gpu_api = 1 [default = false];
|
||||
|
||||
// This option is valid for TFLite GPU delegate API2 only,
|
||||
// Choose any of available APIs to force running inference using it.
|
||||
enum API {
|
||||
ANY = 0;
|
||||
OPENGL = 1;
|
||||
OPENCL = 2;
|
||||
}
|
||||
optional API api = 4 [default = ANY];
|
||||
|
||||
// This option is valid for TFLite GPU delegate API2 only,
|
||||
// Set to true to use 16-bit float precision. If max precision is needed,
|
||||
// set to false for 32-bit float calculations only.
|
||||
optional bool allow_precision_loss = 3 [default = true];
|
||||
|
||||
// Load pre-compiled serialized binary cache to accelerate init process.
|
||||
// Only available for OpenCL delegate on Android.
|
||||
optional bool use_kernel_caching = 2 [default = false];
|
||||
}
|
||||
// Android only.
|
||||
message Nnapi {}
|
||||
message Xnnpack {
|
||||
// Number of threads for XNNPACK delegate. (By default, calculator tries
|
||||
// to choose optimal number of threads depending on the device.)
|
||||
optional int32 num_threads = 1 [default = -1];
|
||||
}
|
||||
|
||||
oneof delegate {
|
||||
TfLite tflite = 1;
|
||||
Gpu gpu = 2;
|
||||
Nnapi nnapi = 3;
|
||||
Xnnpack xnnpack = 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Path to the TF Lite model (ex: /path/to/modelname.tflite).
|
||||
// On mobile, this is generally just modelname.tflite.
|
||||
optional string model_path = 1;
|
||||
|
||||
// Whether the TF Lite GPU or CPU backend should be used. Effective only when
|
||||
// input tensors are on CPU. For input tensors on GPU, GPU backend is always
|
||||
// used.
|
||||
// DEPRECATED: configure "delegate" instead.
|
||||
optional bool use_gpu = 2 [deprecated = true, default = false];
|
||||
|
||||
// Android only. When true, an NNAPI delegate will be used for inference.
|
||||
// If NNAPI is not available, then the default CPU delegate will be used
|
||||
// automatically.
|
||||
// DEPRECATED: configure "delegate" instead.
|
||||
optional bool use_nnapi = 3 [deprecated = true, default = false];
|
||||
|
||||
// The number of threads available to the interpreter. Effective only when
|
||||
// input tensors are on CPU and 'use_gpu' is false.
|
||||
optional int32 cpu_num_thread = 4 [default = -1];
|
||||
|
||||
// TfLite delegate to run inference.
|
||||
// NOTE: calculator is free to choose delegate if not specified explicitly.
|
||||
// NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
|
||||
// precedence over use_* deprecated options.)
|
||||
optional Delegate delegate = 5;
|
||||
}
|
162
mediapipe/calculators/tensor/inference_calculator_test.cc
Normal file
|
@ -0,0 +1,162 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_replace.h"
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/deps/file_path.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/gmock.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
|
||||
#include "mediapipe/framework/tool/validate_type.h"
|
||||
#include "tensorflow/lite/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/register.h"
|
||||
#include "tensorflow/lite/model.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <CoreFoundation/CoreFoundation.h>
|
||||
#endif // defined(__APPLE__)
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
using ::tflite::Interpreter;
|
||||
|
||||
void DoSmokeTest(const std::string& graph_proto) {
|
||||
const int width = 8;
|
||||
const int height = 8;
|
||||
const int channels = 3;
|
||||
// Prepare input tensor.
|
||||
auto input_vec = absl::make_unique<std::vector<Tensor>>();
|
||||
input_vec->emplace_back(Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, height, width, channels});
|
||||
{
|
||||
auto view1 = input_vec->back().GetCpuWriteView();
|
||||
auto tensor_buffer = view1.buffer<float>();
|
||||
ASSERT_NE(tensor_buffer, nullptr);
|
||||
for (int i = 0; i < width * height * channels - 1; i++) {
|
||||
tensor_buffer[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare single calculator graph to and wait for packets.
|
||||
CalculatorGraphConfig graph_config =
|
||||
ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor_out", &graph_config, &output_packets);
|
||||
CalculatorGraph graph(graph_config);
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
|
||||
// Push the tensor into the graph.
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"tensor_in", Adopt(input_vec.release()).At(Timestamp(0))));
|
||||
// Wait until the calculator done processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& result_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
ASSERT_EQ(1, result_vec.size());
|
||||
|
||||
const Tensor& result = result_vec[0];
|
||||
auto view = result.GetCpuReadView();
|
||||
auto result_buffer = view.buffer<float>();
|
||||
ASSERT_NE(result_buffer, nullptr);
|
||||
for (int i = 0; i < width * height * channels - 1; i++) {
|
||||
ASSERT_EQ(3, result_buffer[i]);
|
||||
}
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("tensor_in"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
// Tests a simple add model that adds an input tensor to itself.
|
||||
TEST(InferenceCalculatorTest, SmokeTest) {
|
||||
std::string graph_proto = R"(
|
||||
input_stream: "tensor_in"
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:tensor_in"
|
||||
output_stream: "TENSORS:tensor_out"
|
||||
options {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/calculators/tensor/testdata/add.bin"
|
||||
$delegate
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
// Test CPU inference only.
|
||||
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
|
||||
graph_proto, {{"$delegate", "delegate { tflite {} }"}}));
|
||||
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
|
||||
graph_proto, {{"$delegate", "delegate { xnnpack {} }"}}));
|
||||
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
|
||||
graph_proto,
|
||||
{{"$delegate", "delegate { xnnpack { num_threads: 10 } }"}}));
|
||||
}
|
||||
|
||||
TEST(InferenceCalculatorTest, SmokeTest_ModelAsInputSidePacket) {
|
||||
std::string graph_proto = R"(
|
||||
input_stream: "tensor_in"
|
||||
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:model_path"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet { string_value: "mediapipe/calculators/tensor/testdata/add.bin" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "LocalFileContentsCalculator"
|
||||
input_side_packet: "FILE_PATH:model_path"
|
||||
output_side_packet: "CONTENTS:model_blob"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "TfLiteModelCalculator"
|
||||
input_side_packet: "MODEL_BLOB:model_blob"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:tensor_in"
|
||||
output_stream: "TENSORS:tensor_out"
|
||||
input_side_packet: "MODEL:model"
|
||||
options {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
delegate { tflite {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
DoSmokeTest(graph_proto);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
676
mediapipe/calculators/tensor/tensor_converter_calculator.cc
Normal file
|
@ -0,0 +1,676 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
#import <CoreVideo/CoreVideo.h>
|
||||
#import <Metal/Metal.h>
|
||||
#import <MetalKit/MetalKit.h>
|
||||
|
||||
#import "mediapipe/gpu/MPPMetalHelper.h"
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION < MEDIAPIPE_OPENGL_ES_31
|
||||
#include "mediapipe/gpu/gl_simple_shaders.h"
|
||||
#include "mediapipe/gpu/shader_util.h"
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION < MEDIAPIPE_OPENGL_ES_31
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace {
|
||||
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
|
||||
// Commonly used to compute the number of blocks to launch in a kernel.
|
||||
int NumGroups(const int size, const int group_size) { // NOLINT
|
||||
return (size + group_size - 1) / group_size;
|
||||
}
|
||||
|
||||
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
|
||||
RowMajorMatrixXf;
|
||||
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
|
||||
ColMajorMatrixXf;
|
||||
|
||||
constexpr char kImageFrameTag[] = "IMAGE";
|
||||
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
||||
constexpr char kTensorsTag[] = "TENSORS";
|
||||
constexpr char kMatrixTag[] = "MATRIX";
|
||||
} // namespace
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Calculator for normalizing and converting an ImageFrame, GpuBuffer or Matrix
|
||||
// into a Tensor.
|
||||
//
|
||||
// This calculator is designed to be used with the TfLiteInferenceCalcualtor,
|
||||
// as a pre-processing step for calculator inputs.
|
||||
//
|
||||
// IMAGE and IMAGE_GPU inputs are normalized to [-1,1] (default) or [0,1],
|
||||
// specified by options (unless outputting a quantized tensor).
|
||||
//
|
||||
// Input:
|
||||
// One of the following tags:
|
||||
// IMAGE - ImageFrame (assumed to be 8-bit or 32-bit data).
|
||||
// IMAGE_GPU - GpuBuffer (assumed to be RGBA or RGB GL texture).
|
||||
// MATRIX - Matrix.
|
||||
//
|
||||
// Output:
|
||||
// One of the following tags:
|
||||
// TENSORS - Vector of Tensors of type kFloat32. The resource type used:
|
||||
// - MTLBuffer if Metal API is available
|
||||
// - SSBO if Metal is unavailable and OpenGL ES 3.1 is available
|
||||
// - Texture2D if Metal and GLES 3.1 are not available and GLES 3.0 is.
|
||||
//
|
||||
// Example use:
|
||||
// node {
|
||||
// calculator: "TensorConverterCalculator"
|
||||
// input_stream: "IMAGE:input_image"
|
||||
// output_stream: "TENSORS:image_tensor"
|
||||
// options: {
|
||||
// [mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
// zero_center: true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// IMPORTANT Notes:
|
||||
// GPU tensors are currently only supported on mobile platforms.
|
||||
|
||||
class TensorConverterCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Close(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
::mediapipe::Status InitGpu(CalculatorContext* cc);
|
||||
::mediapipe::Status LoadOptions(CalculatorContext* cc);
|
||||
template <class T>
|
||||
::mediapipe::Status NormalizeImage(const ImageFrame& image_frame,
|
||||
bool flip_vertically, float* tensor_ptr);
|
||||
::mediapipe::Status CopyMatrixToTensor(const Matrix& matrix,
|
||||
float* tensor_ptr);
|
||||
::mediapipe::Status ProcessCPU(CalculatorContext* cc);
|
||||
::mediapipe::Status ProcessGPU(CalculatorContext* cc);
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
MPPMetalHelper* gpu_helper_ = nullptr;
|
||||
id<MTLComputePipelineState> to_buffer_program_;
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
mediapipe::GlCalculatorHelper gpu_helper_;
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
GLuint to_buffer_program_;
|
||||
#else
|
||||
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||
GLuint to_tex2d_program_;
|
||||
GLuint framebuffer_;
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
bool initialized_ = false;
|
||||
bool use_gpu_ = false;
|
||||
absl::optional<std::pair<float, float>> output_range_;
|
||||
bool flip_vertically_ = false;
|
||||
bool row_major_matrix_ = false;
|
||||
int max_num_channels_ = 3;
|
||||
};
|
||||
REGISTER_CALCULATOR(TensorConverterCalculator);
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
// Confirm only one of the input streams is present.
|
||||
RET_CHECK(static_cast<int>(cc->Inputs().HasTag(kImageFrameTag)) +
|
||||
static_cast<int>(cc->Inputs().HasTag(kGpuBufferTag)) +
|
||||
static_cast<int>(cc->Inputs().HasTag(kMatrixTag)) ==
|
||||
1);
|
||||
|
||||
if (cc->Inputs().HasTag(kImageFrameTag)) {
|
||||
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
|
||||
}
|
||||
if (cc->Inputs().HasTag(kMatrixTag)) {
|
||||
cc->Inputs().Tag(kMatrixTag).Set<Matrix>();
|
||||
}
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
if (cc->Inputs().HasTag(kGpuBufferTag)) {
|
||||
cc->Inputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
}
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
RET_CHECK(cc->Outputs().HasTag(kTensorsTag));
|
||||
cc->Outputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
MP_RETURN_IF_ERROR(LoadOptions(cc));
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
if (cc->Inputs().HasTag(kGpuBufferTag)) {
|
||||
use_gpu_ = true;
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
|
||||
RET_CHECK(gpu_helper_);
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
}
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::Process(CalculatorContext* cc) {
|
||||
if (use_gpu_) {
|
||||
if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
// Convert to GPU tensors type.
|
||||
MP_RETURN_IF_ERROR(ProcessGPU(cc));
|
||||
} else {
|
||||
// Convert to CPU tensors or Matrix type.
|
||||
MP_RETURN_IF_ERROR(ProcessCPU(cc));
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::Close(CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
if (use_gpu_) {
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
to_buffer_program_ = nil;
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
gpu_helper_.RunInGlContext([this] {
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
glDeleteProgram(to_buffer_program_);
|
||||
#else
|
||||
glDeleteFramebuffers(1, &framebuffer_);
|
||||
glDeleteProgram(to_tex2d_program_);
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
});
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
}
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::ProcessCPU(
|
||||
CalculatorContext* cc) {
|
||||
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
|
||||
if (cc->Inputs().HasTag(kImageFrameTag)) {
|
||||
if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
const auto& image_frame =
|
||||
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
|
||||
const int height = image_frame.Height();
|
||||
const int width = image_frame.Width();
|
||||
const int channels = image_frame.NumberOfChannels();
|
||||
const int channels_preserved = std::min(channels, max_num_channels_);
|
||||
const mediapipe::ImageFormat::Format format = image_frame.Format();
|
||||
|
||||
if (!(format == mediapipe::ImageFormat::SRGBA ||
|
||||
format == mediapipe::ImageFormat::SRGB ||
|
||||
format == mediapipe::ImageFormat::GRAY8 ||
|
||||
format == mediapipe::ImageFormat::VEC32F1))
|
||||
RET_CHECK_FAIL() << "Unsupported CPU input format.";
|
||||
|
||||
output_tensors->emplace_back(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, height, width, channels_preserved});
|
||||
auto cpu_view = output_tensors->back().GetCpuWriteView();
|
||||
|
||||
// Copy image data into tensor.
|
||||
if (image_frame.ByteDepth() == 1) {
|
||||
MP_RETURN_IF_ERROR(NormalizeImage<uint8>(image_frame, flip_vertically_,
|
||||
cpu_view.buffer<float>()));
|
||||
} else if (image_frame.ByteDepth() == 4) {
|
||||
MP_RETURN_IF_ERROR(NormalizeImage<float>(image_frame, flip_vertically_,
|
||||
cpu_view.buffer<float>()));
|
||||
} else {
|
||||
return ::mediapipe::InternalError(
|
||||
"Only byte-based (8 bit) and float (32 bit) images supported.");
|
||||
}
|
||||
} else if (cc->Inputs().HasTag(kMatrixTag)) {
|
||||
if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get<Matrix>();
|
||||
const int height = matrix.rows();
|
||||
const int width = matrix.cols();
|
||||
const int channels = 1;
|
||||
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, height, width, channels});
|
||||
MP_RETURN_IF_ERROR(CopyMatrixToTensor(
|
||||
matrix, output_tensors->back().GetCpuWriteView().buffer<float>()));
|
||||
} else {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag(kTensorsTag)
|
||||
.Add(output_tensors.release(), cc->InputTimestamp());
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::ProcessGPU(
|
||||
CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
if (!initialized_) {
|
||||
MP_RETURN_IF_ERROR(InitGpu(cc));
|
||||
initialized_ = true;
|
||||
}
|
||||
const auto& input =
|
||||
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
|
||||
int width = input.width();
|
||||
int height = input.height();
|
||||
int channels = max_num_channels_;
|
||||
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
|
||||
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, height, width, channels});
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
|
||||
command_buffer.label = @"TensorConverterCalculatorConvert";
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
[command_buffer computeCommandEncoder];
|
||||
[compute_encoder setComputePipelineState:to_buffer_program_];
|
||||
id<MTLTexture> src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input];
|
||||
[compute_encoder setTexture:src_texture atIndex:0];
|
||||
auto output_view =
|
||||
output_tensors->at(0).GetMtlBufferWriteView(command_buffer);
|
||||
[compute_encoder setBuffer:output_view.buffer() offset:0 atIndex:1];
|
||||
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1);
|
||||
MTLSize threadgroups =
|
||||
MTLSizeMake(NumGroups(input.width(), kWorkgroupSize),
|
||||
NumGroups(input.height(), kWorkgroupSize), 1);
|
||||
[compute_encoder dispatchThreadgroups:threadgroups
|
||||
threadsPerThreadgroup:threads_per_group];
|
||||
[compute_encoder endEncoding];
|
||||
[command_buffer commit];
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
|
||||
[this, &output_tensors, &input]() -> ::mediapipe::Status {
|
||||
auto src = gpu_helper_.CreateSourceTexture(input);
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
// Convert GL texture into SSBO.
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, src.name());
|
||||
auto output_view = output_tensors->back().GetOpenGlBufferWriteView();
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, output_view.name());
|
||||
glUseProgram(to_buffer_program_);
|
||||
glDispatchCompute(NumGroups(input.width(), kWorkgroupSize),
|
||||
NumGroups(input.height(), kWorkgroupSize), 1);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
#else
|
||||
// Texture2D -> Texture2D with OpenGL ES 3.0.
|
||||
glUseProgram(to_tex2d_program_);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
|
||||
glViewport(0, 0, src.width(), src.height());
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
auto output_view = output_tensors->back().GetOpenGlTexture2dWriteView();
|
||||
glBindTexture(GL_TEXTURE_2D, output_view.name());
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
GL_TEXTURE_2D, output_view.name(), 0);
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(src.target(), src.name());
|
||||
glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0,
|
||||
mediapipe::kBasicSquareVertices);
|
||||
glEnableVertexAttribArray(ATTRIB_VERTEX);
|
||||
glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
|
||||
mediapipe::kBasicTextureVertices);
|
||||
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
|
||||
|
||||
// draw
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
// cleanup
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
src.Release();
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
cc->Outputs()
|
||||
.Tag(kTensorsTag)
|
||||
.Add(output_tensors.release(), cc->InputTimestamp());
|
||||
#else
|
||||
RET_CHECK_FAIL() << "GPU processing is not enabled.";
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
// Get input image sizes.
|
||||
const auto& input =
|
||||
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
|
||||
mediapipe::ImageFormat::Format format =
|
||||
mediapipe::ImageFormatForGpuBufferFormat(input.format());
|
||||
const bool include_alpha = (max_num_channels_ == 4);
|
||||
const bool single_channel = (max_num_channels_ == 1);
|
||||
if (!(format == mediapipe::ImageFormat::GRAY8 ||
|
||||
format == mediapipe::ImageFormat::SRGB ||
|
||||
format == mediapipe::ImageFormat::SRGBA))
|
||||
RET_CHECK_FAIL() << "Unsupported GPU input format.";
|
||||
if (include_alpha && (format != mediapipe::ImageFormat::SRGBA))
|
||||
RET_CHECK_FAIL() << "Num input channels is less than desired output.";
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
id<MTLDevice> device = gpu_helper_.mtlDevice;
|
||||
// Shader to convert GL Texture to Metal Buffer,
|
||||
// with normalization to either: [0,1] or [-1,1].
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"(
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
kernel void convertKernel(
|
||||
texture2d<half, access::sample> in_tex [[ texture(0) ]],
|
||||
device float* out_buf [[ buffer(1) ]],
|
||||
uint2 gid [[ thread_position_in_grid ]]) {
|
||||
if (gid.x >= in_tex.get_width() || gid.y >= in_tex.get_height()) return;
|
||||
constexpr sampler texture_sampler(coord::pixel, address::clamp_to_edge);
|
||||
const float2 coord = float2(gid.x, gid.y);
|
||||
half4 pixel = in_tex.sample(texture_sampler, coord);
|
||||
$0 // normalize [-1,1]
|
||||
const int linear_index = $1 * ($2 * in_tex.get_width() + gid.x);
|
||||
out_buf[linear_index + 0] = pixel.x;
|
||||
$3 // g & b channels
|
||||
$4 // alpha channel
|
||||
}
|
||||
)",
|
||||
/*$0=*/
|
||||
output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * half($0) + half($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
output_range_->first)
|
||||
: "",
|
||||
/*$1=*/max_num_channels_,
|
||||
/*$2=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y",
|
||||
/*$3=*/
|
||||
single_channel ? "" : R"(out_buf[linear_index + 1] = pixel.y;
|
||||
out_buf[linear_index + 2] = pixel.z;)",
|
||||
/*$4=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : "");
|
||||
|
||||
NSString* library_source =
|
||||
[NSString stringWithUTF8String:shader_source.c_str()];
|
||||
NSError* error = nil;
|
||||
id<MTLLibrary> library =
|
||||
[device newLibraryWithSource:library_source options:nullptr error:&error];
|
||||
RET_CHECK(library != nil) << "Couldn't create shader library "
|
||||
<< [[error localizedDescription] UTF8String];
|
||||
id<MTLFunction> kernel_func = nil;
|
||||
kernel_func = [library newFunctionWithName:@"convertKernel"];
|
||||
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
|
||||
to_buffer_program_ =
|
||||
[device newComputePipelineStateWithFunction:kernel_func error:&error];
|
||||
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
|
||||
[[error localizedDescription] UTF8String];
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &include_alpha,
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
&input,
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
&single_channel]()
|
||||
-> ::mediapipe::Status {
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
// Shader to convert GL Texture to Shader Storage Buffer Object (SSBO),
|
||||
// with normalization to either: [0,1] or [-1,1].
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"( #version 310 es
|
||||
layout(local_size_x = $0, local_size_y = $0) in;
|
||||
layout(binding = 0) uniform sampler2D input_texture;
|
||||
layout(std430, binding = 1) buffer Output {float elements[];} output_data;
|
||||
ivec2 width_height = ivec2($1, $2);
|
||||
void main() {
|
||||
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (gid.x >= width_height.x || gid.y >= width_height.y) return;
|
||||
vec4 pixel = texelFetch(input_texture, gid, 0);
|
||||
$3 // normalize [-1,1]
|
||||
int linear_index = $7 * ($4 * width_height.x + gid.x);
|
||||
output_data.elements[linear_index + 0] = pixel.x; // r channel
|
||||
$5 // g & b channels
|
||||
$6 // alpha channel
|
||||
})",
|
||||
/*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(),
|
||||
/*$3=*/
|
||||
output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
output_range_->first)
|
||||
: "",
|
||||
/*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y",
|
||||
/*$5=*/
|
||||
single_channel ? ""
|
||||
: R"(output_data.elements[linear_index + 1] = pixel.y;
|
||||
output_data.elements[linear_index + 2] = pixel.z;)",
|
||||
/*$6=*/
|
||||
include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;"
|
||||
: "",
|
||||
/*$7=*/max_num_channels_);
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
const GLchar* sources[] = {shader_source.c_str()};
|
||||
glShaderSource(shader, 1, sources, NULL);
|
||||
glCompileShader(shader);
|
||||
GLint compiled = GL_FALSE;
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
|
||||
RET_CHECK(compiled == GL_TRUE);
|
||||
to_buffer_program_ = glCreateProgram();
|
||||
glAttachShader(to_buffer_program_, shader);
|
||||
glDeleteShader(shader);
|
||||
glLinkProgram(to_buffer_program_);
|
||||
#else
|
||||
// OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion.
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"(
|
||||
#if __VERSION__ < 130
|
||||
#define in varying
|
||||
#endif // __VERSION__ < 130
|
||||
|
||||
#ifdef GL_ES
|
||||
#define fragColor gl_FragColor
|
||||
precision highp float;
|
||||
#else
|
||||
#define lowp
|
||||
#define mediump
|
||||
#define highp
|
||||
#define texture2D texture
|
||||
out $0 fragColor;
|
||||
#endif // defined(GL_ES)
|
||||
|
||||
in vec2 sample_coordinate;
|
||||
uniform sampler2D frame;
|
||||
|
||||
void main() {
|
||||
$1 // flip
|
||||
vec4 pixel = texture2D(frame, sample_coordinate);
|
||||
$2 // normalize [-1,1]
|
||||
fragColor.r = pixel.r; // r channel
|
||||
$3 // g & b channels
|
||||
$4 // alpha channel
|
||||
})",
|
||||
/*$0=*/single_channel ? "vec1" : "vec4",
|
||||
/*$1=*/
|
||||
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;"
|
||||
: "",
|
||||
/*$2=*/output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
output_range_->first)
|
||||
: "",
|
||||
/*$3=*/single_channel ? "" : R"(fragColor.g = pixel.g;
|
||||
fragColor.b = pixel.b;)",
|
||||
/*$4=*/
|
||||
include_alpha ? "fragColor.a = pixel.a;"
|
||||
: (single_channel ? "" : "fragColor.a = 1.0;"));
|
||||
|
||||
const GLint attr_location[NUM_ATTRIBUTES] = {
|
||||
ATTRIB_VERTEX,
|
||||
ATTRIB_TEXTURE_POSITION,
|
||||
};
|
||||
const GLchar* attr_name[NUM_ATTRIBUTES] = {
|
||||
"position",
|
||||
"texture_coordinate",
|
||||
};
|
||||
// shader program and params
|
||||
mediapipe::GlhCreateProgram(
|
||||
mediapipe::kBasicVertexShader, shader_source.c_str(), NUM_ATTRIBUTES,
|
||||
&attr_name[0], attr_location, &to_tex2d_program_);
|
||||
RET_CHECK(to_tex2d_program_) << "Problem initializing the program.";
|
||||
glUseProgram(to_tex2d_program_);
|
||||
glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1);
|
||||
glGenFramebuffers(1, &framebuffer_);
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
return ::mediapipe::OkStatus();
|
||||
}));
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::LoadOptions(
|
||||
CalculatorContext* cc) {
|
||||
// Get calculator options specified in the graph.
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::TensorConverterCalculatorOptions>();
|
||||
|
||||
// if zero_center, set output float range to match [-1, 1] as specified in
|
||||
// calculator proto.
|
||||
if (options.zero_center()) {
|
||||
output_range_.emplace(std::pair<float, float>(-1.0, 1.0));
|
||||
}
|
||||
|
||||
// Custom output_tensor_float_range values.
|
||||
// If the float range is specified in pb text, use the specified values
|
||||
// instead.
|
||||
if (options.has_output_tensor_float_range()) {
|
||||
output_range_.emplace(options.output_tensor_float_range().min(),
|
||||
options.output_tensor_float_range().max());
|
||||
CHECK_GT(output_range_->second, output_range_->first);
|
||||
}
|
||||
|
||||
// Custom div and sub values.
|
||||
if (options.use_custom_normalization()) {
|
||||
output_range_.emplace(std::pair<float, float>(
|
||||
-options.custom_sub(),
|
||||
-options.custom_sub() + 255.0 / options.custom_div()));
|
||||
}
|
||||
|
||||
// Get y-flip mode.
|
||||
flip_vertically_ = options.flip_vertically();
|
||||
|
||||
// Get row_major_matrix mode.
|
||||
row_major_matrix_ = options.row_major_matrix();
|
||||
|
||||
// Get desired way to handle input channels.
|
||||
max_num_channels_ = options.max_num_channels();
|
||||
CHECK_GE(max_num_channels_, 1);
|
||||
CHECK_LE(max_num_channels_, 4);
|
||||
CHECK_NE(max_num_channels_, 2);
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
::mediapipe::Status TensorConverterCalculator::NormalizeImage(
|
||||
const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr) {
|
||||
const int height = image_frame.Height();
|
||||
const int width = image_frame.Width();
|
||||
const int channels = image_frame.NumberOfChannels();
|
||||
const int channels_preserved = std::min(channels, max_num_channels_);
|
||||
const int channels_ignored = channels - channels_preserved;
|
||||
|
||||
if (output_range_.has_value()) {
|
||||
// If the output float range is set and we are not using custom
|
||||
// normalization, normalize the pixel values from [0, 255] to the specified
|
||||
// output range.
|
||||
RET_CHECK_NE(output_range_->first, output_range_->second);
|
||||
const float scale = (output_range_->second - output_range_->first) / 255.0f;
|
||||
const float bias = output_range_->first;
|
||||
|
||||
for (int i = 0; i < height; ++i) {
|
||||
const T* image_ptr = reinterpret_cast<const T*>(
|
||||
image_frame.PixelData() +
|
||||
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
|
||||
for (int j = 0; j < width; ++j) {
|
||||
for (int c = 0; c < channels_preserved; ++c) {
|
||||
*tensor_ptr++ = *image_ptr++ * scale + bias;
|
||||
}
|
||||
image_ptr += channels_ignored;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// [0,1], scale only (bias == 0)
|
||||
// Verified that there are no precision issues with 1.0f / 255.0f expression
|
||||
const float scale = 1.0f / 255.0f;
|
||||
for (int i = 0; i < height; ++i) {
|
||||
const T* image_ptr = reinterpret_cast<const T*>(
|
||||
image_frame.PixelData() +
|
||||
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
|
||||
for (int j = 0; j < width; ++j) {
|
||||
for (int c = 0; c < channels_preserved; ++c) {
|
||||
*tensor_ptr++ = *image_ptr++ * scale;
|
||||
}
|
||||
image_ptr += channels_ignored;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorConverterCalculator::CopyMatrixToTensor(
|
||||
const Matrix& matrix, float* tensor_ptr) {
|
||||
if (row_major_matrix_) {
|
||||
auto matrix_map =
|
||||
Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
|
||||
matrix_map = matrix;
|
||||
} else {
|
||||
auto matrix_map =
|
||||
Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
|
||||
matrix_map = matrix;
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,69 @@
|
|||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
// Full Example:
|
||||
//
|
||||
// node {
|
||||
// calculator: "TensorConverterCalculator"
|
||||
// input_stream: "IMAGE_IN:input_image"
|
||||
// output_stream: "TENSOR_OUT:image_tensor"
|
||||
// options {
|
||||
// [mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
// zero_center: true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
message TensorConverterCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional TensorConverterCalculatorOptions ext = 335742637;
|
||||
}
|
||||
|
||||
// Choose normalization mode for output (not applied for Matrix inputs).
|
||||
// true = [-1,1]
|
||||
// false = [0,1]
|
||||
// Ignored if using quantization.
|
||||
optional bool zero_center = 1 [default = true];
|
||||
|
||||
// Custom settings to override the internal scaling factors `div` and `sub`.
|
||||
// Both values must be set to non-negative values. Will only take effect on
|
||||
// CPU AND when |use_custom_normalization| is set to true. When these custom
|
||||
// values take effect, the |zero_center| setting above will be overriden, and
|
||||
// the normalized_value will be calculated as:
|
||||
// normalized_value = input / custom_div - custom_sub.
|
||||
optional bool use_custom_normalization = 6 [default = false];
|
||||
optional float custom_div = 7 [default = -1.0];
|
||||
optional float custom_sub = 8 [default = -1.0];
|
||||
|
||||
// Whether the input image should be flipped vertically (along the
|
||||
// y-direction). This is useful, for example, when the input image is defined
|
||||
// with a coordinate system where the origin is at the bottom-left corner
|
||||
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
|
||||
// origin.
|
||||
optional bool flip_vertically = 2 [default = false];
|
||||
|
||||
// Controls how many channels of the input image get passed through to the
|
||||
// tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
|
||||
optional int32 max_num_channels = 3 [default = 3];
|
||||
|
||||
// The calculator expects Matrix inputs to be in column-major order. Set
|
||||
// row_major_matrix to true if the inputs are in row-major order.
|
||||
optional bool row_major_matrix = 4 [default = false];
|
||||
|
||||
// Quantization option (CPU only).
|
||||
// When true, output kUint8 tensor instead of kFloat32.
|
||||
optional bool use_quantized_tensors = 5 [default = false];
|
||||
|
||||
// Normalization option.
|
||||
// Setting normalization_range results in the values normalized to
|
||||
// the range [output_tensor_float_range.min, output_tensor_float_range.max].
|
||||
optional TensorFloatRange output_tensor_float_range = 9;
|
||||
|
||||
message TensorFloatRange {
|
||||
optional float min = 1;
|
||||
optional float max = 2;
|
||||
}
|
||||
}
|
323
mediapipe/calculators/tensor/tensor_converter_calculator_test.cc
Normal file
|
@ -0,0 +1,323 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
|
||||
#include "mediapipe/framework/tool/validate_type.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
constexpr char kTransposeOptionsString[] =
|
||||
"[mediapipe.TensorConverterCalculatorOptions.ext]: {"
|
||||
"row_major_matrix: True}";
|
||||
|
||||
} // namespace
|
||||
|
||||
using RandomEngine = std::mt19937_64;
|
||||
using testing::Eq;
|
||||
const uint32 kSeed = 1234;
|
||||
const int kNumSizes = 8;
|
||||
const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2},
|
||||
{5, 3}, {7, 13}, {16, 32}, {101, 2}};
|
||||
|
||||
class TensorConverterCalculatorTest : public ::testing::Test {
|
||||
protected:
|
||||
// Adds a packet with a matrix filled with random values in [0,1].
|
||||
void AddRandomMatrix(int num_rows, int num_columns, uint32 seed,
|
||||
bool row_major_matrix = false) {
|
||||
RandomEngine random(kSeed);
|
||||
std::uniform_real_distribution<> uniform_dist(0, 1.0);
|
||||
auto matrix = ::absl::make_unique<Matrix>();
|
||||
matrix->resize(num_rows, num_columns);
|
||||
if (row_major_matrix) {
|
||||
for (int y = 0; y < num_rows; ++y) {
|
||||
for (int x = 0; x < num_columns; ++x) {
|
||||
float value = uniform_dist(random);
|
||||
(*matrix)(y, x) = value;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int x = 0; x < num_columns; ++x) {
|
||||
for (int y = 0; y < num_rows; ++y) {
|
||||
float value = uniform_dist(random);
|
||||
(*matrix)(y, x) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
MP_ASSERT_OK(graph_->AddPacketToInputStream(
|
||||
"matrix", Adopt(matrix.release()).At(Timestamp(0))));
|
||||
}
|
||||
|
||||
std::unique_ptr<CalculatorGraph> graph_;
|
||||
};
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, RandomMatrixColMajor) {
|
||||
for (int size_index = 0; size_index < kNumSizes; ++size_index) {
|
||||
const int num_rows = sizes[size_index][0];
|
||||
const int num_columns = sizes[size_index][1];
|
||||
|
||||
// Run the calculator and verify that one output is generated.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "TensorConverterCalculator"
|
||||
input_stream: "MATRIX:matrix"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
row_major_matrix: false
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
graph_ = absl::make_unique<CalculatorGraph>();
|
||||
MP_ASSERT_OK(graph_->Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph_->StartRun({}));
|
||||
|
||||
// Push the tensor into the graph.
|
||||
AddRandomMatrix(num_rows, num_columns, kSeed, /*row_major_matrix=*/false);
|
||||
|
||||
// Wait until the calculator done processing.
|
||||
MP_ASSERT_OK(graph_->WaitUntilIdle());
|
||||
EXPECT_EQ(1, output_packets.size());
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& tensor_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
EXPECT_EQ(1, tensor_vec.size());
|
||||
|
||||
const Tensor* tensor = &tensor_vec[0];
|
||||
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
|
||||
|
||||
// Verify that the data is correct.
|
||||
RandomEngine random(kSeed);
|
||||
std::uniform_real_distribution<> uniform_dist(0, 1.0);
|
||||
auto view = tensor->GetCpuReadView();
|
||||
auto tensor_buffer = view.buffer<float>();
|
||||
for (int i = 0; i < num_rows * num_columns; ++i) {
|
||||
const float expected = uniform_dist(random);
|
||||
EXPECT_EQ(expected, tensor_buffer[i]) << "at i = " << i;
|
||||
}
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph_->CloseInputStream("matrix"));
|
||||
MP_ASSERT_OK(graph_->WaitUntilDone());
|
||||
|
||||
graph_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, RandomMatrixRowMajor) {
|
||||
for (int size_index = 0; size_index < kNumSizes; ++size_index) {
|
||||
const int num_rows = sizes[size_index][0];
|
||||
const int num_columns = sizes[size_index][1];
|
||||
|
||||
// Run the calculator and verify that one output is generated.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
|
||||
input_stream: "matrix"
|
||||
node {
|
||||
calculator: "TensorConverterCalculator"
|
||||
input_stream: "MATRIX:matrix"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
row_major_matrix: true
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
graph_ = absl::make_unique<CalculatorGraph>();
|
||||
MP_ASSERT_OK(graph_->Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph_->StartRun({}));
|
||||
|
||||
// Push the tensor into the graph.
|
||||
AddRandomMatrix(num_rows, num_columns, kSeed, /*row_major_matrix=*/true);
|
||||
|
||||
// Wait until the calculator done processing.
|
||||
MP_ASSERT_OK(graph_->WaitUntilIdle());
|
||||
EXPECT_EQ(1, output_packets.size());
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& tensor_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
EXPECT_EQ(1, tensor_vec.size());
|
||||
|
||||
const Tensor* tensor = &tensor_vec[0];
|
||||
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
|
||||
|
||||
// Verify that the data is correct.
|
||||
RandomEngine random(kSeed);
|
||||
std::uniform_real_distribution<> uniform_dist(0, 1.0);
|
||||
auto view = tensor->GetCpuReadView();
|
||||
auto tensor_buffer = view.buffer<float>();
|
||||
for (int i = 0; i < num_rows * num_columns; ++i) {
|
||||
const float expected = uniform_dist(random);
|
||||
EXPECT_EQ(expected, tensor_buffer[i]) << "at i = " << i;
|
||||
}
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph_->CloseInputStream("matrix"));
|
||||
MP_ASSERT_OK(graph_->WaitUntilDone());
|
||||
|
||||
graph_.reset();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, CustomDivAndSub) {
|
||||
CalculatorGraph graph;
|
||||
// Run the calculator and verify that one output is generated.
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
|
||||
input_stream: "input_image"
|
||||
node {
|
||||
calculator: "TensorConverterCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
row_major_matrix: true
|
||||
use_custom_normalization: true
|
||||
custom_div: 2.0
|
||||
custom_sub: 33.0
|
||||
}
|
||||
}
|
||||
}
|
||||
)");
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
cv::Mat mat = ::mediapipe::formats::MatView(input_image.get());
|
||||
mat.at<uint8>(0, 0) = 200;
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
|
||||
|
||||
// Wait until the calculator done processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& tensor_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
EXPECT_EQ(1, tensor_vec.size());
|
||||
|
||||
const Tensor* tensor = &tensor_vec[0];
|
||||
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
|
||||
auto view = tensor->GetCpuReadView();
|
||||
EXPECT_FLOAT_EQ(67.0f, *view.buffer<float>());
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
|
||||
std::vector<std::pair<float, float>> range_values = {
|
||||
std::make_pair(0.0, 1.0), std::make_pair(-1.0, 1.0),
|
||||
std::make_pair(-0.5, 0.5)};
|
||||
for (std::pair<float, float> range : range_values) {
|
||||
CalculatorGraph graph;
|
||||
CalculatorGraphConfig graph_config =
|
||||
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
absl::Substitute(R"(
|
||||
input_stream: "input_image"
|
||||
node {
|
||||
calculator: "TensorConverterCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
output_tensor_float_range {
|
||||
min: $0
|
||||
max: $1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)",
|
||||
/*$0=*/range.first,
|
||||
/*$1=*/range.second));
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
cv::Mat mat = ::mediapipe::formats::MatView(input_image.get());
|
||||
mat.at<uint8>(0, 0) = 200;
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
|
||||
|
||||
// Wait until the calculator finishes processing.
|
||||
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||
EXPECT_THAT(output_packets.size(), Eq(1));
|
||||
|
||||
// Get and process results.
|
||||
const std::vector<Tensor>& tensor_vec =
|
||||
output_packets[0].Get<std::vector<Tensor>>();
|
||||
EXPECT_THAT(tensor_vec.size(), Eq(1));
|
||||
|
||||
const Tensor* tensor = &tensor_vec[0];
|
||||
|
||||
// Calculate the expected normalized value:
|
||||
float normalized_value =
|
||||
range.first + (200 * (range.second - range.first)) / 255.0;
|
||||
|
||||
EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32));
|
||||
auto view = tensor->GetCpuReadView();
|
||||
float dataf = *view.buffer<float>();
|
||||
EXPECT_THAT(
|
||||
normalized_value,
|
||||
testing::FloatNear(dataf, 2.0f * std::abs(dataf) *
|
||||
std::numeric_limits<float>::epsilon()));
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,197 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "absl/types/span.h"
|
||||
#include "mediapipe/calculators/tensor/tensors_to_classification_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
#if defined(MEDIAPIPE_MOBILE)
|
||||
#include "mediapipe/util/android/file/base/file.h"
|
||||
#include "mediapipe/util/android/file/base/helpers.h"
|
||||
#else
|
||||
#include "mediapipe/framework/port/file_helpers.h"
|
||||
#endif
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Convert result tensors from classification models into MediaPipe
|
||||
// classifications.
|
||||
//
|
||||
// Input:
|
||||
// TENSORS - Vector of Tensors of type kFloat32 containing one
|
||||
// tensor, the size of which must be (1, * num_classes).
|
||||
// Output:
|
||||
// CLASSIFICATIONS - Result MediaPipe ClassificationList. The score and index
|
||||
// fields of each classification are set, while the label
|
||||
// field is only set if label_map_path is provided.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "TensorsToClassificationCalculator"
|
||||
// input_stream: "TENSORS:tensors"
|
||||
// output_stream: "CLASSIFICATIONS:classifications"
|
||||
// options: {
|
||||
// [mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||
// num_classes: 1024
|
||||
// min_score_threshold: 0.1
|
||||
// label_map_path: "labelmap.txt"
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class TensorsToClassificationCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Close(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
::mediapipe::TensorsToClassificationCalculatorOptions options_;
|
||||
int top_k_ = 0;
|
||||
std::unordered_map<int, std::string> label_map_;
|
||||
bool label_map_loaded_ = false;
|
||||
};
|
||||
REGISTER_CALCULATOR(TensorsToClassificationCalculator);
|
||||
|
||||
::mediapipe::Status TensorsToClassificationCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
RET_CHECK(!cc->Inputs().GetTags().empty());
|
||||
RET_CHECK(!cc->Outputs().GetTags().empty());
|
||||
|
||||
if (cc->Inputs().HasTag("TENSORS")) {
|
||||
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("CLASSIFICATIONS")) {
|
||||
cc->Outputs().Tag("CLASSIFICATIONS").Set<ClassificationList>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToClassificationCalculator::Open(
|
||||
CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
options_ =
|
||||
cc->Options<::mediapipe::TensorsToClassificationCalculatorOptions>();
|
||||
|
||||
top_k_ = options_.top_k();
|
||||
if (options_.has_label_map_path()) {
|
||||
std::string string_path;
|
||||
ASSIGN_OR_RETURN(string_path,
|
||||
PathToResourceAsFile(options_.label_map_path()));
|
||||
std::string label_map_string;
|
||||
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
|
||||
|
||||
std::istringstream stream(label_map_string);
|
||||
std::string line;
|
||||
int i = 0;
|
||||
while (std::getline(stream, line)) {
|
||||
label_map_[i++] = line;
|
||||
}
|
||||
label_map_loaded_ = true;
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToClassificationCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
const auto& input_tensors =
|
||||
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
|
||||
|
||||
RET_CHECK_EQ(input_tensors.size(), 1);
|
||||
|
||||
int num_classes = input_tensors[0].shape().num_elements();
|
||||
|
||||
if (options_.binary_classification()) {
|
||||
RET_CHECK_EQ(num_classes, 1);
|
||||
// Number of classes for binary classification.
|
||||
num_classes = 2;
|
||||
}
|
||||
if (label_map_loaded_) {
|
||||
RET_CHECK_EQ(num_classes, label_map_.size());
|
||||
}
|
||||
auto view = input_tensors[0].GetCpuReadView();
|
||||
auto raw_scores = view.buffer<float>();
|
||||
|
||||
auto classification_list = absl::make_unique<ClassificationList>();
|
||||
if (options_.binary_classification()) {
|
||||
Classification* class_first = classification_list->add_classification();
|
||||
Classification* class_second = classification_list->add_classification();
|
||||
class_first->set_index(0);
|
||||
class_second->set_index(1);
|
||||
class_first->set_score(raw_scores[0]);
|
||||
class_second->set_score(1. - raw_scores[0]);
|
||||
|
||||
if (label_map_loaded_) {
|
||||
class_first->set_label(label_map_[0]);
|
||||
class_second->set_label(label_map_[1]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < num_classes; ++i) {
|
||||
if (options_.has_min_score_threshold() &&
|
||||
raw_scores[i] < options_.min_score_threshold()) {
|
||||
continue;
|
||||
}
|
||||
Classification* classification =
|
||||
classification_list->add_classification();
|
||||
classification->set_index(i);
|
||||
classification->set_score(raw_scores[i]);
|
||||
|
||||
if (label_map_loaded_) {
|
||||
classification->set_label(label_map_[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note that partial_sort will raise error when top_k_ >
|
||||
// classification_list->classification_size().
|
||||
CHECK_GE(classification_list->classification_size(), top_k_);
|
||||
auto raw_classification_list = classification_list->mutable_classification();
|
||||
if (top_k_ > 0 && classification_list->classification_size() >= top_k_) {
|
||||
std::partial_sort(raw_classification_list->begin(),
|
||||
raw_classification_list->begin() + top_k_,
|
||||
raw_classification_list->end(),
|
||||
[](const Classification a, const Classification b) {
|
||||
return a.score() > b.score();
|
||||
});
|
||||
|
||||
// Resizes the underlying list to have only top_k_ classifications.
|
||||
raw_classification_list->DeleteSubrange(
|
||||
top_k_, raw_classification_list->size() - top_k_);
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag("CLASSIFICATIONS")
|
||||
.Add(classification_list.release(), cc->InputTimestamp());
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToClassificationCalculator::Close(
|
||||
CalculatorContext* cc) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// The option proto for the TensorsToClassificationCalculator.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message TensorsToClassificationCalculatorOptions {
|
||||
extend .mediapipe.CalculatorOptions {
|
||||
optional TensorsToClassificationCalculatorOptions ext = 335742638;
|
||||
}
|
||||
|
||||
// Score threshold for perserving the class.
|
||||
optional float min_score_threshold = 1;
|
||||
// Number of highest scoring labels to output. If top_k is not positive then
|
||||
// all labels are used.
|
||||
optional int32 top_k = 2;
|
||||
// Path to a label map file for getting the actual name of class ids.
|
||||
optional string label_map_path = 3;
|
||||
// Whether the input is a single float for binary classification.
|
||||
// When true, only a single float is expected in the input tensor and the
|
||||
// label map, if provided, is expected to have exactly two labels.
|
||||
// The single score(float) represent the probability of first label, and
|
||||
// 1 - score is the probabilility of the second label.
|
||||
optional bool binary_classification = 4;
|
||||
}
|
|
@ -0,0 +1,174 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/calculators/tensor/tensors_to_classification_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
using ::mediapipe::ParseTextProtoOrDie;
|
||||
using Node = ::mediapipe::CalculatorGraphConfig::Node;
|
||||
|
||||
class TensorsToClassificationCalculatorTest : public ::testing::Test {
|
||||
protected:
|
||||
void BuildGraph(mediapipe::CalculatorRunner* runner,
|
||||
const std::vector<float>& scores) {
|
||||
auto tensors = absl::make_unique<std::vector<Tensor>>();
|
||||
tensors->emplace_back(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, 1, static_cast<int>(scores.size()), 1});
|
||||
auto view = tensors->back().GetCpuWriteView();
|
||||
float* tensor_buffer = view.buffer<float>();
|
||||
ASSERT_NE(tensor_buffer, nullptr);
|
||||
for (int i = 0; i < scores.size(); ++i) {
|
||||
tensor_buffer[i] = scores[i];
|
||||
}
|
||||
|
||||
int64 stream_timestamp = 0;
|
||||
auto& input_stream_packets =
|
||||
runner->MutableInputs()->Tag("TENSORS").packets;
|
||||
|
||||
input_stream_packets.push_back(
|
||||
mediapipe::Adopt(tensors.release())
|
||||
.At(mediapipe::Timestamp(stream_timestamp++)));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutput) {
|
||||
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:tensors"
|
||||
output_stream: "CLASSIFICATIONS:classifications"
|
||||
options {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {}
|
||||
}
|
||||
)"));
|
||||
|
||||
BuildGraph(&runner, {0, 0.5, 1});
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
|
||||
|
||||
EXPECT_EQ(1, output_packets_.size());
|
||||
|
||||
const auto& classification_list =
|
||||
output_packets_[0].Get<ClassificationList>();
|
||||
EXPECT_EQ(3, classification_list.classification_size());
|
||||
|
||||
// Verify that the label_id and score fields are set correctly.
|
||||
for (int i = 0; i < classification_list.classification_size(); ++i) {
|
||||
EXPECT_EQ(i, classification_list.classification(i).index());
|
||||
EXPECT_EQ(i * 0.5, classification_list.classification(i).score());
|
||||
ASSERT_FALSE(classification_list.classification(i).has_label());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutputWithLabelMapPath) {
|
||||
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:tensors"
|
||||
output_stream: "CLASSIFICATIONS:classifications"
|
||||
options {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||
label_map_path: "mediapipe/calculators/tensor/testdata/labelmap.txt"
|
||||
}
|
||||
}
|
||||
)"));
|
||||
|
||||
BuildGraph(&runner, {0, 0.5, 1});
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
|
||||
|
||||
EXPECT_EQ(1, output_packets_.size());
|
||||
|
||||
const auto& classification_list =
|
||||
output_packets_[0].Get<ClassificationList>();
|
||||
EXPECT_EQ(3, classification_list.classification_size());
|
||||
|
||||
// Verify that the label field is set.
|
||||
for (int i = 0; i < classification_list.classification_size(); ++i) {
|
||||
EXPECT_EQ(i, classification_list.classification(i).index());
|
||||
EXPECT_EQ(i * 0.5, classification_list.classification(i).score());
|
||||
ASSERT_TRUE(classification_list.classification(i).has_label());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorsToClassificationCalculatorTest,
|
||||
CorrectOutputWithLabelMinScoreThreshold) {
|
||||
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:tensors"
|
||||
output_stream: "CLASSIFICATIONS:classifications"
|
||||
options {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||
min_score_threshold: 0.6
|
||||
}
|
||||
}
|
||||
)"));
|
||||
|
||||
BuildGraph(&runner, {0, 0.5, 1});
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
|
||||
|
||||
EXPECT_EQ(1, output_packets_.size());
|
||||
|
||||
const auto& classification_list =
|
||||
output_packets_[0].Get<ClassificationList>();
|
||||
|
||||
// Verify that the low score labels are filtered out.
|
||||
EXPECT_EQ(1, classification_list.classification_size());
|
||||
EXPECT_EQ(1, classification_list.classification(0).score());
|
||||
}
|
||||
|
||||
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutputWithTopK) {
|
||||
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:tensors"
|
||||
output_stream: "CLASSIFICATIONS:classifications"
|
||||
options {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] { top_k: 2 }
|
||||
}
|
||||
)"));
|
||||
|
||||
BuildGraph(&runner, {0, 0.5, 1});
|
||||
MP_ASSERT_OK(runner.Run());
|
||||
|
||||
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
|
||||
|
||||
EXPECT_EQ(1, output_packets_.size());
|
||||
|
||||
const auto& classification_list =
|
||||
output_packets_[0].Get<ClassificationList>();
|
||||
|
||||
// Verify that the only top2 labels are left.
|
||||
EXPECT_EQ(2, classification_list.classification_size());
|
||||
for (int i = 0; i < classification_list.classification_size(); ++i) {
|
||||
EXPECT_EQ((classification_list.classification_size() - i) * 0.5,
|
||||
classification_list.classification(i).score());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
1161
mediapipe/calculators/tensor/tensors_to_detections_calculator.cc
Normal file
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// The option proto for the TensorsToDetectionsCalculator.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message TensorsToDetectionsCalculatorOptions {
|
||||
extend .mediapipe.CalculatorOptions {
|
||||
optional TensorsToDetectionsCalculatorOptions ext = 335742639;
|
||||
}
|
||||
|
||||
// [Required] The number of output classes predicted by the detection model.
|
||||
optional int32 num_classes = 1;
|
||||
// [Required] The number of output boxes predicted by the detection model.
|
||||
optional int32 num_boxes = 2;
|
||||
// [Required] The number of output values per boxes predicted by the detection
|
||||
// model. The values contain bounding boxes, keypoints, etc.
|
||||
optional int32 num_coords = 3;
|
||||
|
||||
// The offset of keypoint coordinates in the location tensor.
|
||||
optional int32 keypoint_coord_offset = 9;
|
||||
// The number of predicted keypoints.
|
||||
optional int32 num_keypoints = 10 [default = 0];
|
||||
// The dimension of each keypoint, e.g. number of values predicted for each
|
||||
// keypoint.
|
||||
optional int32 num_values_per_keypoint = 11 [default = 2];
|
||||
// The offset of box coordinates in the location tensor.
|
||||
optional int32 box_coord_offset = 12 [default = 0];
|
||||
|
||||
// Parameters for decoding SSD detection model.
|
||||
optional float x_scale = 4 [default = 0.0];
|
||||
optional float y_scale = 5 [default = 0.0];
|
||||
optional float w_scale = 6 [default = 0.0];
|
||||
optional float h_scale = 7 [default = 0.0];
|
||||
|
||||
optional bool apply_exponential_on_box_size = 13 [default = false];
|
||||
|
||||
// Whether to reverse the order of predicted x, y from output.
|
||||
// If false, the order is [y_center, x_center, h, w], if true the order is
|
||||
// [x_center, y_center, w, h].
|
||||
optional bool reverse_output_order = 14 [default = false];
|
||||
// The ids of classes that should be ignored during decoding the score for
|
||||
// each predicted box.
|
||||
repeated int32 ignore_classes = 8;
|
||||
|
||||
optional bool sigmoid_score = 15 [default = false];
|
||||
optional float score_clipping_thresh = 16;
|
||||
|
||||
// Whether the detection coordinates from the input tensors should be flipped
|
||||
// vertically (along the y-direction). This is useful, for example, when the
|
||||
// input tensors represent detections defined with a coordinate system where
|
||||
// the origin is at the top-left corner, whereas the desired detection
|
||||
// representation has a bottom-left origin (e.g., in OpenGL).
|
||||
optional bool flip_vertically = 18 [default = false];
|
||||
|
||||
// Score threshold for perserving decoded detections.
|
||||
optional float min_score_thresh = 19;
|
||||
}
|
97
mediapipe/calculators/tensor/tensors_to_floats_calculator.cc
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// A calculator for converting Tensors to to a float or a float vector.
|
||||
//
|
||||
// Input:
|
||||
// TENSORS - Vector of Tensors of type kFloat32. Only the first
|
||||
// tensor will be used.
|
||||
// Output:
|
||||
// FLOAT(optional) - Converted single float number.
|
||||
// FLOATS(optional) - Converted float vector.
|
||||
//
|
||||
// Notes: To output FLOAT stream, the input tensor must have size 1, e.g.
|
||||
// only 1 float number in the tensor.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "TensorsToFloatsCalculator"
|
||||
// input_stream: "TENSORS:tensors"
|
||||
// output_stream: "FLOATS:floats"
|
||||
// }
|
||||
class TensorsToFloatsCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(TensorsToFloatsCalculator);
|
||||
|
||||
::mediapipe::Status TensorsToFloatsCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag("TENSORS"));
|
||||
RET_CHECK(cc->Outputs().HasTag("FLOATS") || cc->Outputs().HasTag("FLOAT"));
|
||||
|
||||
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
|
||||
if (cc->Outputs().HasTag("FLOATS")) {
|
||||
cc->Outputs().Tag("FLOATS").Set<std::vector<float>>();
|
||||
}
|
||||
if (cc->Outputs().HasTag("FLOAT")) {
|
||||
cc->Outputs().Tag("FLOAT").Set<float>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToFloatsCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToFloatsCalculator::Process(CalculatorContext* cc) {
|
||||
RET_CHECK(!cc->Inputs().Tag("TENSORS").IsEmpty());
|
||||
|
||||
const auto& input_tensors =
|
||||
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
|
||||
// TODO: Add option to specify which tensor to take from.
|
||||
auto view = input_tensors[0].GetCpuReadView();
|
||||
auto raw_floats = view.buffer<float>();
|
||||
int num_values = input_tensors[0].shape().num_elements();
|
||||
|
||||
if (cc->Outputs().HasTag("FLOAT")) {
|
||||
// TODO: Could add an index in the option to specifiy returning one
|
||||
// value of a float array.
|
||||
RET_CHECK_EQ(num_values, 1);
|
||||
cc->Outputs().Tag("FLOAT").AddPacket(
|
||||
MakePacket<float>(raw_floats[0]).At(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag("FLOATS")) {
|
||||
auto output_floats = absl::make_unique<std::vector<float>>(
|
||||
raw_floats, raw_floats + num_values);
|
||||
cc->Outputs().Tag("FLOATS").Add(output_floats.release(),
|
||||
cc->InputTimestamp());
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
} // namespace mediapipe
|
250
mediapipe/calculators/tensor/tensors_to_landmarks_calculator.cc
Normal file
|
@ -0,0 +1,250 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/calculators/tensor/tensors_to_landmarks_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/tensor.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// A calculator for converting Tensors from regression models into landmarks.
|
||||
// Note that if the landmarks in the tensor has more than 5 dimensions, only the
|
||||
// first 5 dimensions will be converted to [x,y,z, visibility, presence].
|
||||
//
|
||||
// Input:
|
||||
// TENSORS - Vector of Tensors of type kFloat32. Only the first tensor will be
|
||||
// used. The size of the values must be (num_dimension x num_landmarks).
|
||||
//
|
||||
// FLIP_HORIZONTALLY (optional): Whether to flip landmarks horizontally or
|
||||
// not. Overrides corresponding side packet and/or field in the calculator
|
||||
// options.
|
||||
//
|
||||
// FLIP_VERTICALLY (optional): Whether to flip landmarks vertically or not.
|
||||
// Overrides corresponding side packet and/or field in the calculator options.
|
||||
//
|
||||
// Input side packet:
|
||||
// FLIP_HORIZONTALLY (optional): Whether to flip landmarks horizontally or
|
||||
// not. Overrides the corresponding field in the calculator options.
|
||||
//
|
||||
// FLIP_VERTICALLY (optional): Whether to flip landmarks vertically or not.
|
||||
// Overrides the corresponding field in the calculator options.
|
||||
//
|
||||
// Output:
|
||||
// LANDMARKS(optional) - Result MediaPipe landmarks.
|
||||
// NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks.
|
||||
//
|
||||
// Notes:
|
||||
// To output normalized landmarks, user must provide the original input image
|
||||
// size to the model using calculator option input_image_width and
|
||||
// input_image_height.
|
||||
// Usage example:
|
||||
// node {
|
||||
// calculator: "TensorsToLandmarksCalculator"
|
||||
// input_stream: "TENSORS:landmark_tensors"
|
||||
// output_stream: "LANDMARKS:landmarks"
|
||||
// output_stream: "NORM_LANDMARKS:landmarks"
|
||||
// options: {
|
||||
// [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
// num_landmarks: 21
|
||||
//
|
||||
// input_image_width: 256
|
||||
// input_image_height: 256
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class TensorsToLandmarksCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc);
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override;
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
::mediapipe::Status LoadOptions(CalculatorContext* cc);
|
||||
int num_landmarks_ = 0;
|
||||
bool flip_vertically_ = false;
|
||||
bool flip_horizontally_ = false;
|
||||
|
||||
::mediapipe::TensorsToLandmarksCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(TensorsToLandmarksCalculator);
|
||||
|
||||
::mediapipe::Status TensorsToLandmarksCalculator::GetContract(
|
||||
CalculatorContract* cc) {
|
||||
RET_CHECK(!cc->Inputs().GetTags().empty());
|
||||
RET_CHECK(!cc->Outputs().GetTags().empty());
|
||||
|
||||
if (cc->Inputs().HasTag("TENSORS")) {
|
||||
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag("FLIP_HORIZONTALLY")) {
|
||||
cc->Inputs().Tag("FLIP_HORIZONTALLY").Set<bool>();
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag("FLIP_VERTICALLY")) {
|
||||
cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY")) {
|
||||
cc->InputSidePackets().Tag("FLIP_HORIZONTALLY").Set<bool>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("FLIP_VERTICALLY")) {
|
||||
cc->InputSidePackets().Tag("FLIP_VERTICALLY").Set<bool>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("LANDMARKS")) {
|
||||
cc->Outputs().Tag("LANDMARKS").Set<LandmarkList>();
|
||||
}
|
||||
|
||||
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
|
||||
cc->Outputs().Tag("NORM_LANDMARKS").Set<NormalizedLandmarkList>();
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToLandmarksCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
MP_RETURN_IF_ERROR(LoadOptions(cc));
|
||||
|
||||
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
|
||||
RET_CHECK(options_.has_input_image_height() &&
|
||||
options_.has_input_image_width())
|
||||
<< "Must provide input with/height for getting normalized landmarks.";
|
||||
}
|
||||
if (cc->Outputs().HasTag("LANDMARKS") &&
|
||||
(options_.flip_vertically() || options_.flip_horizontally() ||
|
||||
cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY") ||
|
||||
cc->InputSidePackets().HasTag("FLIP_VERTICALLY"))) {
|
||||
RET_CHECK(options_.has_input_image_height() &&
|
||||
options_.has_input_image_width())
|
||||
<< "Must provide input with/height for using flip_vertically option "
|
||||
"when outputing landmarks in absolute coordinates.";
|
||||
}
|
||||
|
||||
flip_horizontally_ =
|
||||
cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY")
|
||||
? cc->InputSidePackets().Tag("FLIP_HORIZONTALLY").Get<bool>()
|
||||
: options_.flip_horizontally();
|
||||
|
||||
flip_vertically_ =
|
||||
cc->InputSidePackets().HasTag("FLIP_VERTICALLY")
|
||||
? cc->InputSidePackets().Tag("FLIP_VERTICALLY").Get<bool>()
|
||||
: options_.flip_vertically();
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToLandmarksCalculator::Process(
|
||||
CalculatorContext* cc) {
|
||||
// Override values if specified so.
|
||||
if (cc->Inputs().HasTag("FLIP_HORIZONTALLY") &&
|
||||
!cc->Inputs().Tag("FLIP_HORIZONTALLY").IsEmpty()) {
|
||||
flip_horizontally_ = cc->Inputs().Tag("FLIP_HORIZONTALLY").Get<bool>();
|
||||
}
|
||||
if (cc->Inputs().HasTag("FLIP_VERTICALLY") &&
|
||||
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
|
||||
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
|
||||
}
|
||||
|
||||
if (cc->Inputs().Tag("TENSORS").IsEmpty()) {
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
const auto& input_tensors =
|
||||
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
|
||||
|
||||
int num_values = input_tensors[0].shape().num_elements();
|
||||
const int num_dimensions = num_values / num_landmarks_;
|
||||
CHECK_GT(num_dimensions, 0);
|
||||
|
||||
auto view = input_tensors[0].GetCpuReadView();
|
||||
auto raw_landmarks = view.buffer<float>();
|
||||
|
||||
LandmarkList output_landmarks;
|
||||
|
||||
for (int ld = 0; ld < num_landmarks_; ++ld) {
|
||||
const int offset = ld * num_dimensions;
|
||||
Landmark* landmark = output_landmarks.add_landmark();
|
||||
|
||||
if (flip_horizontally_) {
|
||||
landmark->set_x(options_.input_image_width() - raw_landmarks[offset]);
|
||||
} else {
|
||||
landmark->set_x(raw_landmarks[offset]);
|
||||
}
|
||||
if (num_dimensions > 1) {
|
||||
if (flip_vertically_) {
|
||||
landmark->set_y(options_.input_image_height() -
|
||||
raw_landmarks[offset + 1]);
|
||||
} else {
|
||||
landmark->set_y(raw_landmarks[offset + 1]);
|
||||
}
|
||||
}
|
||||
if (num_dimensions > 2) {
|
||||
landmark->set_z(raw_landmarks[offset + 2]);
|
||||
}
|
||||
if (num_dimensions > 3) {
|
||||
landmark->set_visibility(raw_landmarks[offset + 3]);
|
||||
}
|
||||
if (num_dimensions > 4) {
|
||||
landmark->set_presence(raw_landmarks[offset + 4]);
|
||||
}
|
||||
}
|
||||
|
||||
// Output normalized landmarks if required.
|
||||
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
|
||||
NormalizedLandmarkList output_norm_landmarks;
|
||||
for (int i = 0; i < output_landmarks.landmark_size(); ++i) {
|
||||
const Landmark& landmark = output_landmarks.landmark(i);
|
||||
NormalizedLandmark* norm_landmark = output_norm_landmarks.add_landmark();
|
||||
norm_landmark->set_x(landmark.x() / options_.input_image_width());
|
||||
norm_landmark->set_y(landmark.y() / options_.input_image_height());
|
||||
// Scale Z coordinate as X + allow additional uniform normalization.
|
||||
norm_landmark->set_z(landmark.z() / options_.input_image_width() /
|
||||
options_.normalize_z());
|
||||
norm_landmark->set_visibility(landmark.visibility());
|
||||
norm_landmark->set_presence(landmark.presence());
|
||||
}
|
||||
cc->Outputs()
|
||||
.Tag("NORM_LANDMARKS")
|
||||
.AddPacket(MakePacket<NormalizedLandmarkList>(output_norm_landmarks)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
// Output absolute landmarks.
|
||||
if (cc->Outputs().HasTag("LANDMARKS")) {
|
||||
cc->Outputs()
|
||||
.Tag("LANDMARKS")
|
||||
.AddPacket(MakePacket<LandmarkList>(output_landmarks)
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status TensorsToLandmarksCalculator::LoadOptions(
|
||||
CalculatorContext* cc) {
|
||||
// Get calculator options specified in the graph.
|
||||
options_ = cc->Options<::mediapipe::TensorsToLandmarksCalculatorOptions>();
|
||||
RET_CHECK(options_.has_num_landmarks());
|
||||
num_landmarks_ = options_.num_landmarks();
|
||||
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// The option proto for the TensorsToLandmarksCalculator.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message TensorsToLandmarksCalculatorOptions {
|
||||
extend .mediapipe.CalculatorOptions {
|
||||
optional TensorsToLandmarksCalculatorOptions ext = 335742640;
|
||||
}
|
||||
|
||||
// [Required] Number of landmarks from the output of the model.
|
||||
optional int32 num_landmarks = 1;
|
||||
|
||||
// Size of the input image for the model. These options are used only when
|
||||
// normalized landmarks are needed. Z coordinate is scaled as X assuming
|
||||
// a weak perspective projection camera model.
|
||||
optional int32 input_image_width = 2;
|
||||
optional int32 input_image_height = 3;
|
||||
|
||||
// Whether the detection coordinates from the input tensors should be flipped
|
||||
// vertically (along the y-direction). This is useful, for example, when the
|
||||
// input tensors represent detections defined with a coordinate system where
|
||||
// the origin is at the top-left corner, whereas the desired detection
|
||||
// representation has a bottom-left origin (e.g., in OpenGL).
|
||||
optional bool flip_vertically = 4 [default = false];
|
||||
|
||||
// Whether the detection coordinates from the input tensors should be flipped
|
||||
// horizontally (along the x-direction). This is useful, for example, when the
|
||||
// input image is horizontally flipped in ImageTransformationCalculator
|
||||
// beforehand.
|
||||
optional bool flip_horizontally = 6 [default = false];
|
||||
|
||||
// A value that Z coordinates should be divided by. This option is used only
|
||||
// when normalized landmarks are needed. It is applied in addition to Z
|
||||
// coordinate being re-scaled as X.
|
||||
optional float normalize_z = 5 [default = 1.0];
|
||||
}
|
BIN
mediapipe/calculators/tensor/testdata/add.bin
vendored
Normal file
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/input.jpg
vendored
Normal file
After Width: | Height: | Size: 5.5 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/large_sub_rect.png
vendored
Normal file
After Width: | Height: | Size: 11 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png
vendored
Normal file
After Width: | Height: | Size: 8.1 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png
vendored
Normal file
After Width: | Height: | Size: 15 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png
vendored
Normal file
After Width: | Height: | Size: 50 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png
vendored
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png
vendored
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
mediapipe/calculators/tensor/testdata/image_to_tensor/noop_except_range.png
vendored
Normal file
After Width: | Height: | Size: 7.2 KiB |
3
mediapipe/calculators/tensor/testdata/labelmap.txt
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
classA
|
||||
classB
|
||||
classC
|
|
@ -84,7 +84,7 @@ namespace mpms = ::mediapipe::mediasequence;
|
|||
// node {
|
||||
// calculator: "UnpackMediaSequenceCalculator"
|
||||
// input_side_packet: "SEQUENCE_EXAMPLE:example_input_side_packet"
|
||||
// input_side_packet: "ROOT_DIRECTORY:path_to_dataset_root_directory"
|
||||
// input_side_packet: "DATASET_ROOT:path_to_dataset_root_directory"
|
||||
// output_side_packet: "DATA_PATH:full_path_to_data_element"
|
||||
// output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
|
||||
// options {
|
||||
|
|
|
@ -404,12 +404,7 @@ bool ShouldUseGpu(CC* cc) {
|
|||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||
#endif
|
||||
} else {
|
||||
// TODO: why only on these platforms?
|
||||
// It seems that the XNNPACK delegate fails to load on Linux.
|
||||
#if defined(__EMSCRIPTEN__) || defined(MEDIAPIPE_ANDROID) || \
|
||||
defined(MEDIAPIPE_IOS)
|
||||
MP_RETURN_IF_ERROR(LoadDelegate(cc));
|
||||
#endif // __EMSCRIPTEN__ || MEDIAPIPE_ANDROID || MEDIAPIPE_IOS
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
|
|
@ -929,6 +929,7 @@ cc_library(
|
|||
deps = [
|
||||
":collection_has_min_size_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
|
@ -1043,3 +1044,26 @@ cc_library(
|
|||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "logic_calculator_proto",
|
||||
srcs = ["logic_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "logic_calculator",
|
||||
srcs = ["logic_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":logic_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
@ -17,18 +17,24 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
typedef CollectionHasMinSizeCalculator<std::vector<::mediapipe::NormalizedRect>>
|
||||
typedef CollectionHasMinSizeCalculator<std::vector<mediapipe::NormalizedRect>>
|
||||
NormalizedRectVectorHasMinSizeCalculator;
|
||||
REGISTER_CALCULATOR(NormalizedRectVectorHasMinSizeCalculator);
|
||||
|
||||
typedef CollectionHasMinSizeCalculator<
|
||||
std::vector<::mediapipe::NormalizedLandmarkList>>
|
||||
std::vector<mediapipe::NormalizedLandmarkList>>
|
||||
NormalizedLandmarkListVectorHasMinSizeCalculator;
|
||||
REGISTER_CALCULATOR(NormalizedLandmarkListVectorHasMinSizeCalculator);
|
||||
|
||||
typedef CollectionHasMinSizeCalculator<
|
||||
std::vector<mediapipe::ClassificationList>>
|
||||
ClassificationListVectorHasMinSizeCalculator;
|
||||
REGISTER_CALCULATOR(ClassificationListVectorHasMinSizeCalculator);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "mediapipe/calculators/util/detections_to_rects_calculator.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
|
@ -36,19 +37,70 @@ constexpr char kNormRectTag[] = "NORM_RECT";
|
|||
constexpr char kRectsTag[] = "RECTS";
|
||||
constexpr char kNormRectsTag[] = "NORM_RECTS";
|
||||
|
||||
constexpr float kMinFloat = std::numeric_limits<float>::lowest();
|
||||
constexpr float kMaxFloat = std::numeric_limits<float>::max();
|
||||
|
||||
::mediapipe::Status NormRectFromKeyPoints(const LocationData& location_data,
|
||||
NormalizedRect* rect) {
|
||||
RET_CHECK_GT(location_data.relative_keypoints_size(), 1)
|
||||
<< "2 or more key points required to calculate a rect.";
|
||||
float xmin = kMaxFloat;
|
||||
float ymin = kMaxFloat;
|
||||
float xmax = kMinFloat;
|
||||
float ymax = kMinFloat;
|
||||
for (int i = 0; i < location_data.relative_keypoints_size(); ++i) {
|
||||
const auto& kp = location_data.relative_keypoints(i);
|
||||
xmin = std::min(xmin, kp.x());
|
||||
ymin = std::min(ymin, kp.y());
|
||||
xmax = std::max(xmax, kp.x());
|
||||
ymax = std::max(ymax, kp.y());
|
||||
}
|
||||
rect->set_x_center((xmin + xmax) / 2);
|
||||
rect->set_y_center((ymin + ymax) / 2);
|
||||
rect->set_width(xmax - xmin);
|
||||
rect->set_height(ymax - ymin);
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
template <class B, class R>
|
||||
void RectFromBox(B box, R* rect) {
|
||||
rect->set_x_center(box.xmin() + box.width() / 2);
|
||||
rect->set_y_center(box.ymin() + box.height() / 2);
|
||||
rect->set_width(box.width());
|
||||
rect->set_height(box.height());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
::mediapipe::Status DetectionsToRectsCalculator::DetectionToRect(
|
||||
const Detection& detection, const DetectionSpec& detection_spec,
|
||||
Rect* rect) {
|
||||
const LocationData location_data = detection.location_data();
|
||||
RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX)
|
||||
<< "Only Detection with formats of BOUNDING_BOX can be converted to Rect";
|
||||
const LocationData::BoundingBox bounding_box = location_data.bounding_box();
|
||||
rect->set_x_center(bounding_box.xmin() + bounding_box.width() / 2);
|
||||
rect->set_y_center(bounding_box.ymin() + bounding_box.height() / 2);
|
||||
rect->set_width(bounding_box.width());
|
||||
rect->set_height(bounding_box.height());
|
||||
switch (options_.conversion_mode()) {
|
||||
case mediapipe::DetectionsToRectsCalculatorOptions_ConversionMode_DEFAULT:
|
||||
case mediapipe::
|
||||
DetectionsToRectsCalculatorOptions_ConversionMode_USE_BOUNDING_BOX: {
|
||||
RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX)
|
||||
<< "Only Detection with formats of BOUNDING_BOX can be converted to "
|
||||
"Rect";
|
||||
RectFromBox(location_data.bounding_box(), rect);
|
||||
break;
|
||||
}
|
||||
case mediapipe::
|
||||
DetectionsToRectsCalculatorOptions_ConversionMode_USE_KEYPOINTS: {
|
||||
RET_CHECK(detection_spec.image_size.has_value())
|
||||
<< "Rect with absolute coordinates calculation requires image size.";
|
||||
const int width = detection_spec.image_size->first;
|
||||
const int height = detection_spec.image_size->second;
|
||||
NormalizedRect norm_rect;
|
||||
MP_RETURN_IF_ERROR(NormRectFromKeyPoints(location_data, &norm_rect));
|
||||
rect->set_x_center(std::round(norm_rect.x_center() * width));
|
||||
rect->set_y_center(std::round(norm_rect.y_center() * height));
|
||||
rect->set_width(std::round(norm_rect.width() * width));
|
||||
rect->set_height(std::round(norm_rect.height() * height));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -56,15 +108,22 @@ constexpr char kNormRectsTag[] = "NORM_RECTS";
|
|||
const Detection& detection, const DetectionSpec& detection_spec,
|
||||
NormalizedRect* rect) {
|
||||
const LocationData location_data = detection.location_data();
|
||||
RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX)
|
||||
<< "Only Detection with formats of RELATIVE_BOUNDING_BOX can be "
|
||||
"converted to NormalizedRect";
|
||||
const LocationData::RelativeBoundingBox bounding_box =
|
||||
location_data.relative_bounding_box();
|
||||
rect->set_x_center(bounding_box.xmin() + bounding_box.width() / 2);
|
||||
rect->set_y_center(bounding_box.ymin() + bounding_box.height() / 2);
|
||||
rect->set_width(bounding_box.width());
|
||||
rect->set_height(bounding_box.height());
|
||||
switch (options_.conversion_mode()) {
|
||||
case mediapipe::DetectionsToRectsCalculatorOptions_ConversionMode_DEFAULT:
|
||||
case mediapipe::
|
||||
DetectionsToRectsCalculatorOptions_ConversionMode_USE_BOUNDING_BOX: {
|
||||
RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX)
|
||||
<< "Only Detection with formats of RELATIVE_BOUNDING_BOX can be "
|
||||
"converted to NormalizedRect";
|
||||
RectFromBox(location_data.relative_bounding_box(), rect);
|
||||
break;
|
||||
}
|
||||
case mediapipe::
|
||||
DetectionsToRectsCalculatorOptions_ConversionMode_USE_KEYPOINTS: {
|
||||
MP_RETURN_IF_ERROR(NormRectFromKeyPoints(location_data, rect));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -35,4 +35,12 @@ message DetectionsToRectsCalculatorOptions {
|
|||
// Whether to output a zero-rect (with origin and size both zero) when the
|
||||
// input detection vector is empty.
|
||||
optional bool output_zero_rect_for_empty_detections = 5;
|
||||
|
||||
enum ConversionMode {
|
||||
DEFAULT = 0;
|
||||
USE_BOUNDING_BOX = 1;
|
||||
USE_KEYPOINTS = 2;
|
||||
}
|
||||
|
||||
optional ConversionMode conversion_mode = 6;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
|
@ -26,6 +30,21 @@
|
|||
#include "mediapipe/framework/port/status_matchers.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
MATCHER_P4(RectEq, x_center, y_center, width, height, "") {
|
||||
return testing::Value(arg.x_center(), testing::Eq(x_center)) &&
|
||||
testing::Value(arg.y_center(), testing::Eq(y_center)) &&
|
||||
testing::Value(arg.width(), testing::Eq(width)) &&
|
||||
testing::Value(arg.height(), testing::Eq(height));
|
||||
}
|
||||
|
||||
MATCHER_P4(NormRectEq, x_center, y_center, width, height, "") {
|
||||
return testing::Value(arg.x_center(), testing::FloatEq(x_center)) &&
|
||||
testing::Value(arg.y_center(), testing::FloatEq(y_center)) &&
|
||||
testing::Value(arg.width(), testing::FloatEq(width)) &&
|
||||
testing::Value(arg.height(), testing::FloatEq(height));
|
||||
}
|
||||
|
||||
Detection DetectionWithLocationData(int32 xmin, int32 ymin, int32 width,
|
||||
int32 height) {
|
||||
|
@ -39,6 +58,19 @@ Detection DetectionWithLocationData(int32 xmin, int32 ymin, int32 width,
|
|||
return detection;
|
||||
}
|
||||
|
||||
Detection DetectionWithKeyPoints(
|
||||
const std::vector<std::pair<float, float>>& key_points) {
|
||||
Detection detection;
|
||||
LocationData* location_data = detection.mutable_location_data();
|
||||
std::for_each(key_points.begin(), key_points.end(),
|
||||
[location_data](std::pair<float, float> kp) {
|
||||
auto* new_kp = location_data->add_relative_keypoints();
|
||||
new_kp->set_x(kp.first);
|
||||
new_kp->set_y(kp.second);
|
||||
});
|
||||
return detection;
|
||||
}
|
||||
|
||||
Detection DetectionWithRelativeLocationData(double xmin, double ymin,
|
||||
double width, double height) {
|
||||
Detection detection;
|
||||
|
@ -70,10 +102,61 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToRect) {
|
|||
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rect = output[0].Get<Rect>();
|
||||
EXPECT_EQ(rect.width(), 300);
|
||||
EXPECT_EQ(rect.height(), 400);
|
||||
EXPECT_EQ(rect.x_center(), 250);
|
||||
EXPECT_EQ(rect.y_center(), 400);
|
||||
EXPECT_THAT(rect, RectEq(250, 400, 300, 400));
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<Rect> RunDetectionKeyPointsToRectCalculation(
|
||||
Detection detection, std::pair<int, int> image_size) {
|
||||
CalculatorRunner runner(ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "RECT:rect"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
conversion_mode: USE_KEYPOINTS
|
||||
}
|
||||
}
|
||||
)"));
|
||||
|
||||
runner.MutableInputs()
|
||||
->Tag("DETECTION")
|
||||
.packets.push_back(MakePacket<Detection>(std::move(detection))
|
||||
.At(Timestamp::PostStream()));
|
||||
runner.MutableInputs()
|
||||
->Tag("IMAGE_SIZE")
|
||||
.packets.push_back(MakePacket<std::pair<int, int>>(image_size)
|
||||
.At(Timestamp::PostStream()));
|
||||
|
||||
MP_RETURN_IF_ERROR(runner.Run());
|
||||
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
|
||||
RET_CHECK_EQ(output.size(), 1);
|
||||
return output[0].Get<Rect>();
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionKeyPointsToRect) {
|
||||
auto status_or_value = RunDetectionKeyPointsToRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {1.0f, 1.0f}}),
|
||||
/*image_size=*/{640, 480});
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(320, 240, 640, 480));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.25f, 0.25f}, {0.75f, 0.75f}}),
|
||||
/*image_size=*/{640, 480});
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(320, 240, 320, 240));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {0.5f, 0.5f}}),
|
||||
/*image_size=*/{640, 480});
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(160, 120, 320, 240));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.5f, 0.5f}, {1.0f, 1.0f}}),
|
||||
/*image_size=*/{640, 480});
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(480, 360, 320, 240));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRect) {
|
||||
|
@ -95,10 +178,56 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRect) {
|
|||
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rect = output[0].Get<NormalizedRect>();
|
||||
EXPECT_FLOAT_EQ(rect.width(), 0.3);
|
||||
EXPECT_FLOAT_EQ(rect.height(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rect.x_center(), 0.25);
|
||||
EXPECT_FLOAT_EQ(rect.y_center(), 0.4);
|
||||
EXPECT_THAT(rect, NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
|
||||
}
|
||||
|
||||
::mediapipe::StatusOr<NormalizedRect>
|
||||
RunDetectionKeyPointsToNormRectCalculation(Detection detection) {
|
||||
CalculatorRunner runner(ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:detection"
|
||||
output_stream: "NORM_RECT:rect"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
conversion_mode: USE_KEYPOINTS
|
||||
}
|
||||
}
|
||||
)"));
|
||||
|
||||
runner.MutableInputs()
|
||||
->Tag("DETECTION")
|
||||
.packets.push_back(MakePacket<Detection>(std::move(detection))
|
||||
.At(Timestamp::PostStream()));
|
||||
|
||||
MP_RETURN_IF_ERROR(runner.Run());
|
||||
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
|
||||
RET_CHECK_EQ(output.size(), 1);
|
||||
return output[0].Get<NormalizedRect>();
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionKeyPointsToNormalizedRect) {
|
||||
NormalizedRect rect;
|
||||
|
||||
auto status_or_value = RunDetectionKeyPointsToNormRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints(
|
||||
{{0.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 1.0f}}));
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.5f, 0.5f, 1.0f, 1.0f));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints(
|
||||
{{0.25f, 0.25f}, {0.75f, 0.25f}, {0.75f, 0.75f}}));
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.5f, 0.5f, 0.5f, 0.5f));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {0.5f, 0.5f}}));
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.25f, 0.25f, 0.5f, 0.5f));
|
||||
|
||||
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
|
||||
/*detection=*/DetectionWithKeyPoints({{0.5f, 0.5f}, {1.0f, 1.0f}}));
|
||||
MP_ASSERT_OK(status_or_value);
|
||||
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.75f, 0.75f, 0.5f, 0.5f));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionsToRect) {
|
||||
|
@ -121,10 +250,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToRect) {
|
|||
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rect = output[0].Get<Rect>();
|
||||
EXPECT_EQ(rect.width(), 300);
|
||||
EXPECT_EQ(rect.height(), 400);
|
||||
EXPECT_EQ(rect.x_center(), 250);
|
||||
EXPECT_EQ(rect.y_center(), 400);
|
||||
EXPECT_THAT(rect, RectEq(250, 400, 300, 400));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRect) {
|
||||
|
@ -147,10 +273,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRect) {
|
|||
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rect = output[0].Get<NormalizedRect>();
|
||||
EXPECT_FLOAT_EQ(rect.width(), 0.3);
|
||||
EXPECT_FLOAT_EQ(rect.height(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rect.x_center(), 0.25);
|
||||
EXPECT_FLOAT_EQ(rect.y_center(), 0.4);
|
||||
EXPECT_THAT(rect, NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionsToRects) {
|
||||
|
@ -173,15 +296,9 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToRects) {
|
|||
const std::vector<Packet>& output = runner.Outputs().Tag("RECTS").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rects = output[0].Get<std::vector<Rect>>();
|
||||
EXPECT_EQ(rects.size(), 2);
|
||||
EXPECT_EQ(rects[0].width(), 300);
|
||||
EXPECT_EQ(rects[0].height(), 400);
|
||||
EXPECT_EQ(rects[0].x_center(), 250);
|
||||
EXPECT_EQ(rects[0].y_center(), 400);
|
||||
EXPECT_EQ(rects[1].width(), 400);
|
||||
EXPECT_EQ(rects[1].height(), 500);
|
||||
EXPECT_EQ(rects[1].x_center(), 400);
|
||||
EXPECT_EQ(rects[1].y_center(), 550);
|
||||
ASSERT_EQ(rects.size(), 2);
|
||||
EXPECT_THAT(rects[0], RectEq(250, 400, 300, 400));
|
||||
EXPECT_THAT(rects[1], RectEq(400, 550, 400, 500));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRects) {
|
||||
|
@ -205,15 +322,9 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRects) {
|
|||
runner.Outputs().Tag("NORM_RECTS").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rects = output[0].Get<std::vector<NormalizedRect>>();
|
||||
EXPECT_EQ(rects.size(), 2);
|
||||
EXPECT_FLOAT_EQ(rects[0].width(), 0.3);
|
||||
EXPECT_FLOAT_EQ(rects[0].height(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rects[0].x_center(), 0.25);
|
||||
EXPECT_FLOAT_EQ(rects[0].y_center(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rects[1].width(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rects[1].height(), 0.5);
|
||||
EXPECT_FLOAT_EQ(rects[1].x_center(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rects[1].y_center(), 0.55);
|
||||
ASSERT_EQ(rects.size(), 2);
|
||||
EXPECT_THAT(rects[0], NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
|
||||
EXPECT_THAT(rects[1], NormRectEq(0.4f, 0.55f, 0.4f, 0.5f));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionToRects) {
|
||||
|
@ -236,10 +347,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToRects) {
|
|||
ASSERT_EQ(1, output.size());
|
||||
const auto& rects = output[0].Get<std::vector<Rect>>();
|
||||
EXPECT_EQ(rects.size(), 1);
|
||||
EXPECT_EQ(rects[0].width(), 300);
|
||||
EXPECT_EQ(rects[0].height(), 400);
|
||||
EXPECT_EQ(rects[0].x_center(), 250);
|
||||
EXPECT_EQ(rects[0].y_center(), 400);
|
||||
EXPECT_THAT(rects[0], RectEq(250, 400, 300, 400));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRects) {
|
||||
|
@ -262,11 +370,8 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRects) {
|
|||
runner.Outputs().Tag("NORM_RECTS").packets;
|
||||
ASSERT_EQ(1, output.size());
|
||||
const auto& rects = output[0].Get<std::vector<NormalizedRect>>();
|
||||
EXPECT_EQ(rects.size(), 1);
|
||||
EXPECT_FLOAT_EQ(rects[0].width(), 0.3);
|
||||
EXPECT_FLOAT_EQ(rects[0].height(), 0.4);
|
||||
EXPECT_FLOAT_EQ(rects[0].x_center(), 0.25);
|
||||
EXPECT_FLOAT_EQ(rects[0].y_center(), 0.4);
|
||||
ASSERT_EQ(rects.size(), 1);
|
||||
EXPECT_THAT(rects[0], NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
|
||||
}
|
||||
|
||||
TEST(DetectionsToRectsCalculatorTest, WrongInputToRect) {
|
||||
|
@ -309,4 +414,5 @@ TEST(DetectionsToRectsCalculatorTest, WrongInputToNormalizedRect) {
|
|||
"Only Detection with formats of RELATIVE_BOUNDING_BOX"));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
|
|
105
mediapipe/calculators/util/logic_calculator.cc
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mediapipe/calculators/util/logic_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
using mediapipe::LogicCalculatorOptions;
|
||||
|
||||
// A calculator to compute logical functions of bool inputs.
|
||||
// With just one input, the output equals the input as expected.
|
||||
//
|
||||
// Inputs: One or more bool inputs, which may be input-stream-packets,
|
||||
// input-side-packets, or options input-values.
|
||||
//
|
||||
// Outputs: One bool stream.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "LogicCalculator"
|
||||
// input_stream: "has_data"
|
||||
// input_side_packet: "enable"
|
||||
// input_stream: "is_valid"
|
||||
// output_stream: "process_data"
|
||||
// options {
|
||||
// [mediapipe.LogicCalculatorOptions.ext] {
|
||||
// op: AND
|
||||
// input_value: true
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class LogicCalculator : public CalculatorBase {
|
||||
public:
|
||||
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
|
||||
for (int k = 0; k < cc->Inputs().NumEntries(""); ++k) {
|
||||
cc->Inputs().Index(k).Set<bool>();
|
||||
}
|
||||
for (int k = 0; k < cc->InputSidePackets().NumEntries(""); ++k) {
|
||||
cc->InputSidePackets().Index(k).Set<bool>();
|
||||
}
|
||||
RET_CHECK_GE(cc->Inputs().NumEntries("") +
|
||||
cc->InputSidePackets().NumEntries("") +
|
||||
cc->Options<LogicCalculatorOptions>().input_value_size(),
|
||||
1);
|
||||
RET_CHECK_EQ(cc->Outputs().NumEntries(""), 1);
|
||||
cc->Outputs().Index(0).Set<bool>();
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
::mediapipe::Status Open(CalculatorContext* cc) override {
|
||||
options_ = cc->Options<LogicCalculatorOptions>();
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
bool LogicalOp(bool b1, bool b2) {
|
||||
switch (options_.op()) {
|
||||
case LogicCalculatorOptions::AND:
|
||||
return b1 && b2;
|
||||
case LogicCalculatorOptions::OR:
|
||||
return b1 || b2;
|
||||
case LogicCalculatorOptions::XOR:
|
||||
return b1 ^ b2;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
::mediapipe::Status Process(CalculatorContext* cc) override {
|
||||
bool result = options_.op() == LogicCalculatorOptions::AND ? true : false;
|
||||
for (int k = 0; k < options_.input_value_size(); ++k) {
|
||||
result = LogicalOp(result, options_.input_value(k));
|
||||
}
|
||||
for (int k = 0; k < cc->Inputs().NumEntries(""); ++k) {
|
||||
result = LogicalOp(result, cc->Inputs().Index(k).Value().Get<bool>());
|
||||
}
|
||||
for (int k = 0; k < cc->InputSidePackets().NumEntries(""); ++k) {
|
||||
result = LogicalOp(result, cc->InputSidePackets().Index(k).Get<bool>());
|
||||
}
|
||||
if (options_.negate()) {
|
||||
result = !result;
|
||||
}
|
||||
cc->Outputs().Index(0).Add(new bool(result), cc->InputTimestamp());
|
||||
return ::mediapipe::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
LogicCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(LogicCalculator);
|
||||
|
||||
} // namespace mediapipe
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -12,10 +12,27 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
syntax = "proto2";
|
||||
|
||||
#import "mediapipe/examples/ios/common/CommonViewController.h"
|
||||
package mediapipe;
|
||||
|
||||
@interface MultiHandTrackingViewController : CommonViewController
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
@end
|
||||
message LogicCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional LogicCalculatorOptions ext = 338731246;
|
||||
}
|
||||
// The logical operation to apply.
|
||||
enum Operation {
|
||||
AND = 0;
|
||||
OR = 1;
|
||||
XOR = 2;
|
||||
}
|
||||
optional Operation op = 1;
|
||||
|
||||
// Whether to negate the result.
|
||||
optional bool negate = 2;
|
||||
|
||||
// Optional bool input values.
|
||||
repeated bool input_value = 3;
|
||||
}
|
|
@ -36,9 +36,8 @@ android_binary(
|
|||
name = "facedetectioncpu",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/face_detection:mobile_cpu.binarypb",
|
||||
"//mediapipe/models:face_detection_front.tflite",
|
||||
"//mediapipe/models:face_detection_front_labelmap.txt",
|
||||
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
|
||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
@ -47,7 +46,7 @@ android_binary(
|
|||
"appName": "Face Detection (CPU)",
|
||||
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
|
||||
"cameraFacingFront": "True",
|
||||
"binaryGraphName": "mobile_cpu.binarypb",
|
||||
"binaryGraphName": "face_detection_mobile_cpu.binarypb",
|
||||
"inputVideoStreamName": "input_video",
|
||||
"outputVideoStreamName": "output_video",
|
||||
"flipFramesVertically": "True",
|
||||
|
|
|
@ -36,9 +36,8 @@ android_binary(
|
|||
name = "facedetectiongpu",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/face_detection:mobile_gpu.binarypb",
|
||||
"//mediapipe/models:face_detection_front.tflite",
|
||||
"//mediapipe/models:face_detection_front_labelmap.txt",
|
||||
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
|
||||
"//mediapipe/modules/face_detection:face_detection_front.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
@ -47,7 +46,7 @@ android_binary(
|
|||
"appName": "Face Detection",
|
||||
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
|
||||
"cameraFacingFront": "True",
|
||||
"binaryGraphName": "mobile_gpu.binarypb",
|
||||
"binaryGraphName": "face_detection_mobile_gpu.binarypb",
|
||||
"inputVideoStreamName": "input_video",
|
||||
"outputVideoStreamName": "output_video",
|
||||
"flipFramesVertically": "True",
|
||||
|
|
|
@ -37,8 +37,7 @@ android_binary(
|
|||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
|
||||
"//mediapipe/models:palm_detection.tflite",
|
||||
"//mediapipe/models:palm_detection_labelmap.txt",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
|
|
@ -37,10 +37,9 @@ android_binary(
|
|||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
|
||||
"//mediapipe/models:handedness.txt",
|
||||
"//mediapipe/models:hand_landmark.tflite",
|
||||
"//mediapipe/models:palm_detection.tflite",
|
||||
"//mediapipe/models:palm_detection_labelmap.txt",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
|
|
@ -18,76 +18,75 @@ import android.os.Bundle;
|
|||
import android.util.Log;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||
import com.google.mediapipe.framework.AndroidPacketCreator;
|
||||
import com.google.mediapipe.framework.Packet;
|
||||
import com.google.mediapipe.framework.PacketGetter;
|
||||
import com.google.protobuf.InvalidProtocolBufferException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/** Main activity of MediaPipe hand tracking app. */
|
||||
public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
||||
private static final String TAG = "MainActivity";
|
||||
|
||||
private static final String OUTPUT_HAND_PRESENCE_STREAM_NAME = "hand_presence";
|
||||
private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
|
||||
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
|
||||
// Max number of hands to detect/process.
|
||||
private static final int NUM_HANDS = 2;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
|
||||
processor.addPacketCallback(
|
||||
OUTPUT_HAND_PRESENCE_STREAM_NAME,
|
||||
(packet) -> {
|
||||
Boolean handPresence = PacketGetter.getBool(packet);
|
||||
if (!handPresence) {
|
||||
Log.d(
|
||||
TAG,
|
||||
"[TS:" + packet.getTimestamp() + "] Hand presence is false, no hands detected.");
|
||||
}
|
||||
});
|
||||
AndroidPacketCreator packetCreator = processor.getPacketCreator();
|
||||
Map<String, Packet> inputSidePackets = new HashMap<>();
|
||||
inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
|
||||
processor.setInputSidePackets(inputSidePackets);
|
||||
|
||||
// To show verbose logging, run:
|
||||
// adb shell setprop log.tag.MainActivity VERBOSE
|
||||
if (Log.isLoggable(TAG, Log.VERBOSE)) {
|
||||
processor.addPacketCallback(
|
||||
OUTPUT_LANDMARKS_STREAM_NAME,
|
||||
(packet) -> {
|
||||
byte[] landmarksRaw = PacketGetter.getProtoBytes(packet);
|
||||
try {
|
||||
NormalizedLandmarkList landmarks = NormalizedLandmarkList.parseFrom(landmarksRaw);
|
||||
if (landmarks == null) {
|
||||
Log.v(TAG, "[TS:" + packet.getTimestamp() + "] No hand landmarks.");
|
||||
return;
|
||||
}
|
||||
// Note: If hand_presence is false, these landmarks are useless.
|
||||
OUTPUT_LANDMARKS_STREAM_NAME,
|
||||
(packet) -> {
|
||||
Log.v(TAG, "Received multi-hand landmarks packet.");
|
||||
List<NormalizedLandmarkList> multiHandLandmarks =
|
||||
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
|
||||
Log.v(
|
||||
TAG,
|
||||
"[TS:"
|
||||
+ packet.getTimestamp()
|
||||
+ "] #Landmarks for hand: "
|
||||
+ landmarks.getLandmarkCount());
|
||||
Log.v(TAG, getLandmarksDebugString(landmarks));
|
||||
} catch (InvalidProtocolBufferException e) {
|
||||
Log.e(TAG, "Couldn't Exception received - " + e);
|
||||
return;
|
||||
}
|
||||
});
|
||||
+ "] "
|
||||
+ getMultiHandLandmarksDebugString(multiHandLandmarks));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static String getLandmarksDebugString(NormalizedLandmarkList landmarks) {
|
||||
int landmarkIndex = 0;
|
||||
String landmarksString = "";
|
||||
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
|
||||
landmarksString +=
|
||||
"\t\tLandmark["
|
||||
+ landmarkIndex
|
||||
+ "]: ("
|
||||
+ landmark.getX()
|
||||
+ ", "
|
||||
+ landmark.getY()
|
||||
+ ", "
|
||||
+ landmark.getZ()
|
||||
+ ")\n";
|
||||
++landmarkIndex;
|
||||
private String getMultiHandLandmarksDebugString(List<NormalizedLandmarkList> multiHandLandmarks) {
|
||||
if (multiHandLandmarks.isEmpty()) {
|
||||
return "No hand landmarks";
|
||||
}
|
||||
return landmarksString;
|
||||
String multiHandLandmarksStr = "Number of hands detected: " + multiHandLandmarks.size() + "\n";
|
||||
int handIndex = 0;
|
||||
for (NormalizedLandmarkList landmarks : multiHandLandmarks) {
|
||||
multiHandLandmarksStr +=
|
||||
"\t#Hand landmarks for hand[" + handIndex + "]: " + landmarks.getLandmarkCount() + "\n";
|
||||
int landmarkIndex = 0;
|
||||
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
|
||||
multiHandLandmarksStr +=
|
||||
"\t\tLandmark ["
|
||||
+ landmarkIndex
|
||||
+ "]: ("
|
||||
+ landmark.getX()
|
||||
+ ", "
|
||||
+ landmark.getY()
|
||||
+ ", "
|
||||
+ landmark.getZ()
|
||||
+ ")\n";
|
||||
++landmarkIndex;
|
||||
}
|
||||
++handIndex;
|
||||
}
|
||||
return multiHandLandmarksStr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
cc_binary(
|
||||
name = "libmediapipe_jni.so",
|
||||
linkshared = 1,
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators",
|
||||
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mediapipe_jni_lib",
|
||||
srcs = [":libmediapipe_jni.so"],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
android_binary(
|
||||
name = "multihandtrackinggpu",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/hand_tracking:multi_hand_tracking_mobile_gpu.binarypb",
|
||||
"//mediapipe/models:handedness.txt",
|
||||
"//mediapipe/models:hand_landmark.tflite",
|
||||
"//mediapipe/models:palm_detection.tflite",
|
||||
"//mediapipe/models:palm_detection_labelmap.txt",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
manifest_values = {
|
||||
"applicationId": "com.google.mediapipe.apps.multihandtrackinggpu",
|
||||
"appName": "Multi-hand Tracking",
|
||||
"mainActivity": ".MainActivity",
|
||||
"cameraFacingFront": "True",
|
||||
"binaryGraphName": "multi_hand_tracking_mobile_gpu.binarypb",
|
||||
"inputVideoStreamName": "input_video",
|
||||
"outputVideoStreamName": "output_video",
|
||||
"flipFramesVertically": "True",
|
||||
},
|
||||
multidex = "native",
|
||||
deps = [
|
||||
":mediapipe_jni_lib",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
|
||||
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||
],
|
||||
)
|
|
@ -1,80 +0,0 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.google.mediapipe.apps.multihandtrackinggpu;
|
||||
|
||||
import android.os.Bundle;
|
||||
import android.util.Log;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||
import com.google.mediapipe.framework.PacketGetter;
|
||||
import java.util.List;
|
||||
|
||||
/** Main activity of MediaPipe multi-hand tracking app. */
|
||||
public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
||||
private static final String TAG = "MainActivity";
|
||||
|
||||
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_hand_landmarks";
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
|
||||
// To show verbose logging, run:
|
||||
// adb shell setprop log.tag.MainActivity VERBOSE
|
||||
if (Log.isLoggable(TAG, Log.VERBOSE)) {
|
||||
processor.addPacketCallback(
|
||||
OUTPUT_LANDMARKS_STREAM_NAME,
|
||||
(packet) -> {
|
||||
Log.v(TAG, "Received multi-hand landmarks packet.");
|
||||
List<NormalizedLandmarkList> multiHandLandmarks =
|
||||
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
|
||||
Log.v(
|
||||
TAG,
|
||||
"[TS:"
|
||||
+ packet.getTimestamp()
|
||||
+ "] "
|
||||
+ getMultiHandLandmarksDebugString(multiHandLandmarks));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private String getMultiHandLandmarksDebugString(List<NormalizedLandmarkList> multiHandLandmarks) {
|
||||
if (multiHandLandmarks.isEmpty()) {
|
||||
return "No hand landmarks";
|
||||
}
|
||||
String multiHandLandmarksStr = "Number of hands detected: " + multiHandLandmarks.size() + "\n";
|
||||
int handIndex = 0;
|
||||
for (NormalizedLandmarkList landmarks : multiHandLandmarks) {
|
||||
multiHandLandmarksStr +=
|
||||
"\t#Hand landmarks for hand[" + handIndex + "]: " + landmarks.getLandmarkCount() + "\n";
|
||||
int landmarkIndex = 0;
|
||||
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
|
||||
multiHandLandmarksStr +=
|
||||
"\t\tLandmark ["
|
||||
+ landmarkIndex
|
||||
+ "]: ("
|
||||
+ landmark.getX()
|
||||
+ ", "
|
||||
+ landmark.getY()
|
||||
+ ", "
|
||||
+ landmark.getZ()
|
||||
+ ")\n";
|
||||
++landmarkIndex;
|
||||
}
|
||||
++handIndex;
|
||||
}
|
||||
return multiHandLandmarksStr;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -12,16 +12,64 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||
load(":build_defs.bzl", "generate_manifest_values")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:private"])
|
||||
|
||||
config_setting(
|
||||
name = "use_chair",
|
||||
define_values = {
|
||||
"chair": "true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "use_cup",
|
||||
define_values = {
|
||||
"cup": "true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "use_camera",
|
||||
define_values = {
|
||||
"camera": "true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "use_shoe_1stage",
|
||||
define_values = {
|
||||
"shoe_1stage": "true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "use_chair_1stage",
|
||||
define_values = {
|
||||
"chair_1stage": "true",
|
||||
},
|
||||
)
|
||||
|
||||
selects.config_setting_group(
|
||||
name = "1stage",
|
||||
match_any = [
|
||||
":use_shoe_1stage",
|
||||
":use_chair_1stage",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "libmediapipe_jni.so",
|
||||
linkshared = 1,
|
||||
linkstatic = 1,
|
||||
deps = [
|
||||
"//mediapipe/graphs/object_detection_3d:mobile_calculators",
|
||||
deps = select({
|
||||
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_calculators"],
|
||||
":1stage": ["//mediapipe/graphs/object_detection_3d:mobile_calculators_1stage"],
|
||||
}) + [
|
||||
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
|
||||
],
|
||||
)
|
||||
|
@ -32,67 +80,108 @@ cc_library(
|
|||
alwayslink = 1,
|
||||
)
|
||||
|
||||
# To use the "chair" model instead of the default "shoes" model,
|
||||
# add "--define chair=true" to the bazel build command.
|
||||
config_setting(
|
||||
name = "use_chair_model",
|
||||
define_values = {
|
||||
"chair": "true",
|
||||
},
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "binary_graph",
|
||||
srcs = select({
|
||||
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_shoe"],
|
||||
":use_chair_model": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_chair"],
|
||||
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph"],
|
||||
":1stage": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_1stage_binary_graph"],
|
||||
}),
|
||||
outs = ["object_detection_3d.binarypb"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
MODELS_DIR = "//mediapipe/models"
|
||||
|
||||
genrule(
|
||||
name = "model",
|
||||
srcs = select({
|
||||
"//conditions:default": ["//mediapipe/models:object_detection_3d_sneakers.tflite"],
|
||||
":use_chair_model": ["//mediapipe/models:object_detection_3d_chair.tflite"],
|
||||
"//conditions:default": [MODELS_DIR + ":object_detection_3d_sneakers.tflite"],
|
||||
":use_chair": [MODELS_DIR + ":object_detection_3d_chair.tflite"],
|
||||
":use_cup": [MODELS_DIR + ":object_detection_3d_cup.tflite"],
|
||||
":use_camera": [MODELS_DIR + ":object_detection_3d_camera.tflite"],
|
||||
":use_shoe_1stage": [MODELS_DIR + ":object_detection_3d_sneakers_1stage.tflite"],
|
||||
":use_chair_1stage": [MODELS_DIR + ":object_detection_3d_chair_1stage.tflite"],
|
||||
}),
|
||||
outs = ["object_detection_3d.tflite"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
MANIFESTS_DIR = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/manifests"
|
||||
|
||||
android_library(
|
||||
name = "manifest_lib",
|
||||
exports_manifest = 1,
|
||||
manifest = select({
|
||||
"//conditions:default": MANIFESTS_DIR + ":AndroidManifestSneaker.xml",
|
||||
":use_chair": MANIFESTS_DIR + ":AndroidManifestChair.xml",
|
||||
":use_cup": MANIFESTS_DIR + ":AndroidManifestCup.xml",
|
||||
":use_camera": MANIFESTS_DIR + ":AndroidManifestCamera.xml",
|
||||
":use_shoe_1stage": MANIFESTS_DIR + ":AndroidManifestSneaker.xml",
|
||||
":use_chair_1stage": MANIFESTS_DIR + ":AndroidManifestChair.xml",
|
||||
}),
|
||||
deps = [
|
||||
"//third_party:opencv",
|
||||
"@maven//:androidx_concurrent_concurrent_futures",
|
||||
"@maven//:com_google_guava_guava",
|
||||
],
|
||||
)
|
||||
|
||||
ASSETS_DIR = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets"
|
||||
|
||||
genrule(
|
||||
name = "mesh",
|
||||
srcs = select({
|
||||
"//conditions:default": [ASSETS_DIR + "/sneaker:model.obj.uuu"],
|
||||
":use_chair": [ASSETS_DIR + "/chair:model.obj.uuu"],
|
||||
":use_cup": [ASSETS_DIR + "/cup:model.obj.uuu"],
|
||||
":use_camera": [ASSETS_DIR + "/camera:model.obj.uuu"],
|
||||
":use_shoe_1stage": [ASSETS_DIR + "/sneaker:model.obj.uuu"],
|
||||
":use_chair_1stage": [ASSETS_DIR + "/chair:model.obj.uuu"],
|
||||
}),
|
||||
outs = ["model.obj.uuu"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "texture",
|
||||
srcs = select({
|
||||
"//conditions:default": [ASSETS_DIR + "/sneaker:texture.jpg"],
|
||||
":use_chair": [ASSETS_DIR + "/chair:texture.jpg"],
|
||||
":use_cup": [ASSETS_DIR + "/cup:texture.jpg"],
|
||||
":use_camera": [ASSETS_DIR + "/camera:texture.jpg"],
|
||||
":use_shoe_1stage": [ASSETS_DIR + "/sneaker:texture.jpg"],
|
||||
":use_chair_1stage": [ASSETS_DIR + "/chair:texture.jpg"],
|
||||
}),
|
||||
outs = ["texture.jpg"],
|
||||
cmd = "cp $< $@",
|
||||
)
|
||||
|
||||
android_binary(
|
||||
name = "objectdetection3d",
|
||||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
":binary_graph",
|
||||
":model",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:box.obj.uuu",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:classic_colors.png",
|
||||
] + select({
|
||||
"//conditions:default": [
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:model.obj.uuu",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:texture.jpg",
|
||||
],
|
||||
":use_chair_model": [
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:model.obj.uuu",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:texture.jpg",
|
||||
],
|
||||
}),
|
||||
":mesh",
|
||||
":texture",
|
||||
MODELS_DIR + ":object_detection_ssd_mobilenetv2_oidv4_fp16.tflite",
|
||||
MODELS_DIR + ":object_detection_oidv4_labelmap.pbtxt",
|
||||
ASSETS_DIR + ":box.obj.uuu",
|
||||
ASSETS_DIR + ":classic_colors.png",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
manifest_values = {
|
||||
"applicationId": "com.google.mediapipe.apps.objectdetection3d",
|
||||
"appName": "Objectron",
|
||||
"mainActivity": ".MainActivity",
|
||||
"cameraFacingFront": "False",
|
||||
"binaryGraphName": "object_detection_3d.binarypb",
|
||||
"inputVideoStreamName": "input_video",
|
||||
"outputVideoStreamName": "output_video",
|
||||
"flipFramesVertically": "True",
|
||||
},
|
||||
manifest_values = select({
|
||||
"//conditions:default": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_shoe", "Shoe Objectron"),
|
||||
":use_chair": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_chair", "Chair Objectron"),
|
||||
":use_cup": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_cup", "Cup Objectron"),
|
||||
":use_camera": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_camera", "Camera Objectron"),
|
||||
":use_shoe_1stage": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_shoe_1stage", "Single Stage Shoe Objectron"),
|
||||
":use_chair_1stage": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_chair_1stage", "Single Stage Chair Objectron"),
|
||||
}),
|
||||
multidex = "native",
|
||||
deps = [
|
||||
":manifest_lib",
|
||||
":mediapipe_jni_lib",
|
||||
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
|
||||
"//mediapipe/framework/formats:landmark_java_proto_lite",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -14,6 +14,9 @@
|
|||
|
||||
package com.google.mediapipe.apps.objectdetection3d;
|
||||
|
||||
import android.content.pm.ApplicationInfo;
|
||||
import android.content.pm.PackageManager;
|
||||
import android.content.pm.PackageManager.NameNotFoundException;
|
||||
import android.graphics.Bitmap;
|
||||
import android.graphics.BitmapFactory;
|
||||
import android.os.Bundle;
|
||||
|
@ -40,10 +43,25 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
|||
private Bitmap objTexture = null;
|
||||
private Bitmap boxTexture = null;
|
||||
|
||||
// ApplicationInfo for retrieving metadata defined in the manifest.
|
||||
private ApplicationInfo applicationInfo;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
|
||||
try {
|
||||
applicationInfo =
|
||||
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
|
||||
} catch (NameNotFoundException e) {
|
||||
Log.e(TAG, "Cannot find application info: " + e);
|
||||
}
|
||||
|
||||
String categoryName = applicationInfo.metaData.getString("categoryName");
|
||||
float[] modelScale = parseFloatArrayFromString(
|
||||
applicationInfo.metaData.getString("modelScale"));
|
||||
float[] modelTransform = parseFloatArrayFromString(
|
||||
applicationInfo.metaData.getString("modelTransformation"));
|
||||
prepareDemoAssets();
|
||||
AndroidPacketCreator packetCreator = processor.getPacketCreator();
|
||||
Map<String, Packet> inputSidePackets = new HashMap<>();
|
||||
|
@ -51,6 +69,9 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
|||
inputSidePackets.put("box_asset_name", packetCreator.createString(BOX_FILE));
|
||||
inputSidePackets.put("obj_texture", packetCreator.createRgbaImageFrame(objTexture));
|
||||
inputSidePackets.put("box_texture", packetCreator.createRgbaImageFrame(boxTexture));
|
||||
inputSidePackets.put("allowed_labels", packetCreator.createString(categoryName));
|
||||
inputSidePackets.put("model_scale", packetCreator.createFloat32Array(modelScale));
|
||||
inputSidePackets.put("model_transformation", packetCreator.createFloat32Array(modelTransform));
|
||||
processor.setInputSidePackets(inputSidePackets);
|
||||
}
|
||||
|
||||
|
@ -134,4 +155,13 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static float[] parseFloatArrayFromString(String string) {
|
||||
String[] elements = string.split(",", -1);
|
||||
float[] array = new float[elements.length];
|
||||
for (int i = 0; i < elements.length; ++i) {
|
||||
array[i] = Float.parseFloat(elements[i]);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
}
|
||||
|
|