Project import generated by Copybara.

GitOrigin-RevId: f7d09ed033907b893638a8eb4148efa11c0f09a6
This commit is contained in:
MediaPipe Team 2020-11-04 19:02:35 -05:00 committed by chuoling
parent a8d6ce95c4
commit f96eadd6df
250 changed files with 15261 additions and 4620 deletions

View File

@ -7,5 +7,4 @@ include MANIFEST.in
include README.md
include requirements.txt
recursive-include mediapipe/modules *.tflite *.txt
recursive-include mediapipe/graphs *.binarypb
recursive-include mediapipe/modules *.tflite *.txt *.binarypb

View File

@ -35,9 +35,9 @@ Object Detection
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
@ -53,6 +53,19 @@ See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described in:
* [MediaPipe Face Mesh](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/face_mesh_py_colab)
* [MediaPipe Hands](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/hands_py_colab)
* [MediaPipe Pose](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/pose_py_colab)
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile

View File

@ -364,9 +364,9 @@ http_archive(
)
#Tensorflow repo should always go after the other external dependencies.
# 2020-08-30
_TENSORFLOW_GIT_COMMIT = "57b009e31e59bd1a7ae85ef8c0232ed86c9b71db"
_TENSORFLOW_SHA256= "de7f5f06204e057383028c7e53f3b352cdf85b3a40981b1a770c9a415a792c0e"
# 2020-10-30
_TENSORFLOW_GIT_COMMIT = "84384703c0d8b502e33ff6fd7eefd219dca5ff8e"
_TENSORFLOW_SHA256= "23fb322fc15a20f7a7838d9a31f8b16f60700a494ea654311a0aa8621769df98"
http_archive(
name = "org_tensorflow",
urls = [

View File

@ -93,38 +93,40 @@ for app in ${apps}; do
echo "=== Target: ${target}"
if [[ $install_only == false ]]; then
bazel_flags=("${default_bazel_flags[@]}")
bazel_flags+=(${target})
if [[ $strip == true ]]; then
bazel_flags+=(--linkopt=-s)
fi
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_4
fi
bazel "${bazel_flags[@]}"
cp -f "${bin}" "${apk}"
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_3
fi
fi
if [[ ${app_name} == "objectdetection3d" ]]; then
orig_apk=${apk}
apk="${out_dir}/${target_name}_shoes.apk"
cp -f "${orig_apk}" "${apk}"
apks+=(${apk})
apk="${out_dir}/${target_name}_chairs.apk"
categories=("shoe" "chair" "cup" "camera" "shoe_1stage" "chair_1stage")
for category in ${categories[@]}; do
apk="${out_dir}/${target_name}_${category}.apk"
if [[ $install_only == false ]]; then
bazel_flags_extended=("${bazel_flags[@]}")
if [[ ${category} != "shoe" ]]; then
bazel_flags_extended+=(--define ${category}=true)
fi
echo "bazel ${bazel_flags_extended[@]}"
bazel "${bazel_flags_extended[@]}"
cp -f "${bin}" "${apk}"
fi
apks+=(${apk})
done
else
if [[ $install_only == false ]]; then
bazel_flags+=(--define chair=true)
bazel_flags=("${default_bazel_flags[@]}")
bazel_flags+=(${target})
if [[ $strip == true ]]; then
bazel_flags+=(--linkopt=-s)
fi
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_4
fi
bazel "${bazel_flags[@]}"
cp -f "${bin}" "${apk}"
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_3
fi
fi
apks+=(${apk})
fi
apks+=(${apk})
fi
done

View File

@ -86,9 +86,7 @@ for app in ${apps}; do
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
fi
if [[ $build_only == false ]]; then
if [[ ${target_name} == "multi_hand_tracking" ]]; then
graph_name="hand_tracking/multi_hand_tracking"
elif [[ ${target_name} == "object_tracking" ]]; then
if [[ ${target_name} == "object_tracking" ]]; then
graph_name="tracking/object_detection_tracking"
elif [[ ${target_name} == "upper_body_pose_tracking" ]]; then
graph_name="pose_tracking/upper_body_pose_tracking"

View File

@ -135,6 +135,7 @@ each project.
def camerax_version = "1.0.0-beta10"
implementation "androidx.camera:camera-core:$camerax_version"
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
}
```

View File

@ -427,45 +427,13 @@ Note: This currently works only on Linux, and please first follow
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below in
[Run in python interpreter](#run-in-python-interpreter) and in this
[colab](https://mediapipe.page.link/mp-py-colab).
install mediapipe` on Linux and macOS, as described in, for instance,
[Python section in MediaPipe Pose](../solutions/pose.md#python) and in this
[colab](https://mediapipe.page.link/pose_py_colab).
### Run in Python interpreter
Using [MediaPipe Pose](../solutions/pose.md) as an example:
```bash
# Activate a Python virtual environment.
$ python3 -m venv mp_env && source mp_env/bin/activate
# Install MediaPipe Python package
(mp_env)$ pip install mediapipe
# Run in Python interpreter
(mp_env)$ python3
>>> import mediapipe as mp
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
# For image input
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
# For live camera input
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
>>> pose_tracker.run_live()
# Close the tracker.
>>> pose_tracker.close()
```
Tip: Use command `deactivate` to exit the Python virtual environment.
### Building Python package from source
Follow these steps only if you have local changes and need to build the Python
package from source. Otherwise, we strongly encourage our users to simply run
`pip install mediapipe`, more convenient and much faster.
Follow the steps below only if you have local changes and need to build the
Python package from source. Otherwise, we strongly encourage our users to simply
run `pip install mediapipe`, more convenient and much faster.
1. Make sure that Bazel and OpenCV are correctly installed and configured for
MediaPipe. Please see [Installation](./install.md) for how to setup Bazel

View File

@ -12,7 +12,7 @@ nav_order: 1
{:toc}
---
Note: To interoperate with OpenCV, OpenCV 3.x and above are preferred. OpenCV
Note: To interoperate with OpenCV, OpenCV 3.x to 4.1 are preferred. OpenCV
2.x currently works but interoperability support may be deprecated in the
future.

Binary file not shown.

After

Width:  |  Height:  |  Size: 889 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 MiB

After

Width:  |  Height:  |  Size: 923 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 670 KiB

After

Width:  |  Height:  |  Size: 744 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 625 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.8 MiB

After

Width:  |  Height:  |  Size: 968 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

View File

@ -35,9 +35,9 @@ Object Detection
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅
@ -53,6 +53,19 @@ See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described in:
* [MediaPipe Face Mesh](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/face_mesh_py_colab)
* [MediaPipe Hands](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/hands_py_colab)
* [MediaPipe Pose](../solutions/pose.md#python) and
[colab](https://mediapipe.page.link/pose_py_colab)
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile

View File

@ -254,6 +254,99 @@ and for iOS modify `kNumFaces` in
Tip: Maximum number of faces to detect/process is set to 1 by default. To change
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
#### Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below and in this
[colab](https://mediapipe.page.link/face_mesh_py_colab). If you do need to build
the Python package from source, see
[additional instructions](../getting_started/building_examples.md#python).
Activate a Python virtual environment:
```bash
$ python3 -m venv mp_env && source mp_env/bin/activate
```
Install MediaPipe Python package:
```bash
(mp_env)$ pip install mediapipe
```
Run the following Python code:
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/face_mesh.py and copy it over. -->
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
# For static images:
face_mesh = mp_face_mesh.FaceMesh(
static_image_mode=True,
max_num_faces=1,
min_detection_confidence=0.5)
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB before processing.
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print and draw face mesh landmarks on the image.
if not results.multi_face_landmarks:
continue
annotated_image = image.copy()
for face_landmarks in results.multi_face_landmarks:
print('face_landmarks:', face_landmarks)
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', image)
face_mesh.close()
# For webcam input:
face_mesh = mp_face_mesh.FaceMesh(
min_detection_confidence=0.5, min_tracking_confidence=0.5)
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
if not success:
break
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = face_mesh.process(image)
# Draw the face mesh annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACE_CONNECTIONS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
cv2.imshow('MediaPipe FaceMesh', image)
if cv2.waitKey(5) & 0xFF == 27:
break
face_mesh.close()
cap.release()
```
### Face Effect Example
Face effect example showcases real-time mobile face effect application use case

View File

@ -55,13 +55,21 @@ frame, and only when the landmark model could no longer identify hand presence
is palm detection invoked to relocalize the hand.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt),
which internally utilizes a
[palm/hand detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_detection_gpu.pbtxt),
a
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt)
and a
[renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/renderer_gpu.pbtxt).
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
that uses a
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
from the
[hand landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark),
and renders using a dedicated
[hand renderer subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/hand_renderer_gpu.pbtxt).
The
[hand landmark tracking subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)
internally uses a
[hand landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)
from the same module and a
[palm detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt)
from the
[palm detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection).
Note: To visualize a graph, copy the graph and paste it into
[MediaPipe Visualizer](https://viz.mediapipe.dev/). For more information on how
@ -146,34 +154,11 @@ to visualize its associated subgraphs, please see
* iOS target:
[`mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/BUILD)
#### With Multi-hand Support
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1Wk6V9EVaz1ks_MInPqqVGvvJD01SGXDc)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu:multihandtrackinggpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD)
* iOS target:
[`mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu/BUILD)
There are two key differences between this graph and that in the
[main example](#main-example) (which handles only one hand):
1. There is a `NormalizedRectVectorHasMinSize` calculator, that checks if in
input vector of `NormalizedRect` objects has a minimum size equal to `N`. In
this graph, if the vector contains fewer than `N` objects,
`MultiHandDetection` subgraph runs. Otherwise, the `GateCalculator` doesn't
send any image packets to the `MultiHandDetection` subgraph. This way, the
main graph is efficient in that it avoids running the costly hand detection
step when there are already `N` hands in the frame.
2. The `MergeCalculator` has been replaced by the `AssociationNormRect`
calculator. This `AssociationNormRect` takes as input a vector of
`NormalizedRect` objects from the `MultiHandDetection` subgraph on the
current frame, and a vector of `NormalizedRect` objects from the
`MultiHandLandmark` subgraph from the previous frame, and performs an
association operation between these objects. This calculator ensures that
the output vector doesn't contain overlapping regions based on the specified
`min_similarity_threshold`.
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
it, for Android modify `NUM_HANDS` in
[MainActivity.java](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/handtrackinggpu/MainActivity.java),
and for iOS modify `kNumHands` in
[HandTrackingViewController.mm](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/handtrackinggpu/HandTrackingViewController.mm).
#### Palm/Hand Detection Only (no landmarks)
@ -187,8 +172,6 @@ There are two key differences between this graph and that in the
### Desktop
#### Main Example
* Running on CPU
* Graph:
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt)
@ -196,22 +179,101 @@ There are two key differences between this graph and that in the
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt)
[`mediapipe/graphs/hand_tracking/hand_tracking_desktop_live_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/hand_tracking_desktop_gpu.pbtxt)
* Target:
[`mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/hand_tracking/BUILD)
#### With Multi-hand Support
Tip: Maximum number of hands to detect/process is set to 2 by default. To change
it, in the graph file modify the option of `ConstantSidePacketCalculator`.
* Running on CPU
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live)
* Target:
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
* Running on GPU
* Graph:
[`mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt)
* Target:
[`mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/multi_hand_tracking/BUILD)
### Python
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below and in this
[colab](https://mediapipe.page.link/hands_py_colab). If you do need to build the
Python package from source, see
[additional instructions](../getting_started/building_examples.md#python).
Activate a Python virtual environment:
```bash
$ python3 -m venv mp_env && source mp_env/bin/activate
```
Install MediaPipe Python package:
```bash
(mp_env)$ pip install mediapipe
```
Run the following Python code:
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/hands.py and copy it over. -->
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
# For static images:
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.7)
for idx, file in enumerate(file_list):
# Read an image, flip it around y-axis for correct handedness output (see
# above).
image = cv2.flip(cv2.imread(file), 1)
# Convert the BGR image to RGB before processing.
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print handedness and draw hand landmarks on the image.
print('handedness:', results.multi_handedness)
if not results.multi_hand_landmarks:
continue
annotated_image = image.copy()
for hand_landmarks in results.multi_hand_landmarks:
print('hand_landmarks:', hand_landmarks)
mp_drawing.draw_landmarks(
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(image, 1))
hands.close()
# For webcam input:
hands = mp_hands.Hands(
min_detection_confidence=0.7, min_tracking_confidence=0.5)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
if not success:
break
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = hands.process(image)
# Draw the hand annotations on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
cv2.imshow('MediaPipe Hands', image)
if cv2.waitKey(5) & 0xFF == 27:
break
hands.close()
cap.release()
```
Tip: Use command `deactivate` to exit the Python virtual environment.
### Web

View File

@ -37,10 +37,10 @@ nav_order: 30
### [Hands](https://google.github.io/mediapipe/solutions/hands)
* Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite),
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite),
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc)
@ -68,6 +68,11 @@ nav_order: 30
* [TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_sneakers.tflite)
* [TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_chair.tflite)
* [TFLite model for cameras](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_camera.tflite)
* [TFLite model for cups](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_cup.tflite)
* [Single-stage TFLite model for shoes](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_sneakers_1stage.tflite)
* [Single-stage TFLite model for chairs](https://github.com/google/mediapipe/tree/master/mediapipe/models/object_detection_3d_chair_1stage.tflite)
* [Model card](https://mediapipe.page.link/objectron-mc)
### [KNIFT](https://google.github.io/mediapipe/solutions/knift)

View File

@ -15,13 +15,12 @@ nav_order: 10
## Overview
MediaPipe Objectron is a mobile real-time 3D object detection solution for
everyday objects. It detects objects in 2D images, and estimates their poses and
sizes through a machine learning (ML) model, trained on a newly created 3D
dataset.
everyday objects. It detects objects in 2D images, and estimates their poses
through a machine learning (ML) model, trained on a newly created 3D dataset.
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif)
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------:
*Fig 1(a). Objectron for Shoes.* | *Fig 1(b). Objectron for Chairs.*
![objectron_shoe_android_gpu.gif](../images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu.gif](../images/mobile/objectron_chair_android_gpu.gif) | ![objectron_camera_android_gpu.gif](../images/mobile/objectron_camera_android_gpu.gif) | ![objectron_cup_android_gpu.gif](../images/mobile/objectron_cup_android_gpu.gif)
:--------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------:
*Fig 1(a). Objectron for Shoes.* | *Fig 1(b). Objectron for Chairs.* | *Fig 1(c). Objectron for Cameras.* | *Fig 1(d). Objectron for Cups.*
Object detection is an extensively studied computer vision problem, but most of
the research has focused on
@ -85,15 +84,41 @@ able to increase the accuracy by about 10%.
:-------------------------------------------------------------------------------------------: |
*Fig 4. An example of AR synthetic data generation. The virtual white-brown cereal box is rendered into the real scene, next to the real blue book.* |
## ML Model for 3D Object Detection
## ML Pipelines for 3D Object Detection
We built two ML pipelines to predict the 3D bounding box of an object from a
single RGB image: one is a two-stage pipeline and the other is a single-stage
pipeline. The two-stage pipeline is 3x faster than the single-stage pipeline
with similar or better accuracy. The single stage pipeline is good at detecting
multiple objects, whereas the two stage pipeline is good for a single dominant
object.
### Two-stage Pipeline
Our two-stage pipeline is illustrated by the diagram in Fig 5. The first stage
uses an object detector to find the 2D crop of the object. The second stage
takes the image crop and estimates the 3D bounding box. At the same time, it
also computes the 2D crop of the object for the next frame, such that the object
detector does not need to run every frame.
![objectron_network_architecture.png](../images/objectron_2stage_network_architecture.png) |
:----------------------------------------------------------------------------------------: |
*Fig 5. Network architecture and post-processing for two-stage 3D object detection.* |
We can use any 2D object detector for the first stage. In this solution, we use
[TensorFlow Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection).
The second stage 3D bounding box predictor we released runs 83FPS on Adreno 650
mobile GPU.
### Single-stage Pipeline
![objectron_network_architecture.png](../images/objectron_network_architecture.png) |
:---------------------------------------------------------------------------------: |
*Fig 5. Network architecture and post-processing for 3D object detection.* |
*Fig 6. Network architecture and post-processing for single-stage 3D object detection.* |
We [built a single-stage model](https://arxiv.org/abs/2003.03522) to predict the
pose and physical size of an object from a single RGB image. The model backbone
has an encoder-decoder architecture, built upon
Our [single-stage pipeline](https://arxiv.org/abs/2003.03522) is illustrated by
the diagram in Fig 6, the model backbone has an encoder-decoder architecture,
built upon
[MobileNetv2](https://ai.googleblog.com/2018/04/mobilenetv2-next-generation-of-on.html).
We employ a multi-task learning approach, jointly predicting an object's shape
with detection and regression. The shape task predicts the object's shape
@ -114,9 +139,9 @@ size of the object. The model is light enough to run real-time on mobile devices
![objectron_sample_network_results.png](../images/objectron_sample_network_results.png) |
:-------------------------------------------------------------------------------------: |
*Fig 6. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
*Fig 7. Sample results of our network — (Left) original 2D image with estimated bounding boxes, (Middle) object detection by Gaussian distribution, (Right) predicted segmentation mask.* |
## Detection and Tracking Pipeline
#### Detection and Tracking
When the model is applied to every frame captured by the mobile device, it can
suffer from jitter due to the ambiguity of the 3D bounding box estimated in each
@ -130,7 +155,7 @@ temporally consistent, reducing the jitter.
The Objectron 3D object detection and tracking pipeline is implemented as a
MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt),
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt),
which internally uses a
[detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt)
and a
@ -147,6 +172,12 @@ new detection becomes available from the detection subgraph, the tracking
subgraph is also responsible for consolidation between the detection and
tracking results, based on the area of overlap.
## Objectron Dataset
We also released our [Objectron dataset](http://objectron.dev), with which we
trained our 3D object detection models. The technical details of the Objectron
dataset, including usage and tutorials, are available on the dataset website.
## Example Apps
Please first see general instructions for
@ -158,32 +189,72 @@ Note: To visualize a graph, copy the graph and paste it into
to visualize its associated subgraphs, please see
[visualizer documentation](../tools/visualizer.md).
### Objectron for Shoes
### Two-stage Objectron
* Graph:
[`mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1S0K4hbWt3o31FfQ4QU3Rz7IHrvOUMx1d)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
* iOS target: Not available
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
### Objectron for Chairs
* Graph:
[`mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/chair_classic_occlusion_tracking.pbtxt)
* Android target:
[(or download prebuilt ARM64 APK)](https://drive.google.com/open?id=1MM8K-13bXLCVS1EHQ-KgkVyEahEPrKej)
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD)
and add `--define chair=true` to the build command, i.e.,
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
Build for **shoes** (default) with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1ANW9WDOCb8QO1r8gDC03A4UgrPkICdPP/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **chairs** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1lcUv1TBnv_SxnKSQwdOqbdLa9mkaTJHy/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define chair=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **cups** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1bf77KDkowwrduleiC9B1M1XnEhjnOQbX/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define cup=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build for **cameras** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GM7lPO-s5URVxIzQur1bLsionEJs3yIl/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define camera=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
* iOS target: Not available
### Single-stage Objectron
* Graph:
[`mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt`](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/object_occlusion_tracking.pbtxt)
* Android target:
[`mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD).
Build with **single-stage** model for **shoes** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1MvaEg4dkvKN8jAU1Z2GtudyXi1rQHYsE/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define shoe_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
Build with **single-stage** model for **chairs** with:
[(or download prebuilt ARM64 APK)](https://drive.google.com/file/d/1GJL4z3jr-wD1jMHGd4NBfOG-Yoq5t167/view?usp=sharing)
```bash
bazel build -c opt --config android_arm64 --define chair_1stage=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
```
* iOS target: Not available
## Resources
* Google AI Blog:
[Announcing the Objectron Dataset](https://mediapipe.page.link/objectron_dataset_ai_blog)
* Google AI Blog:
[Real-Time 3D Object Detection on Mobile Devices with MediaPipe](https://ai.googleblog.com/2020/03/real-time-3d-object-detection-on-mobile.html)
* Paper: [MobilePose: Real-Time Pose Estimation for Unseen Objects with Weak

View File

@ -5,7 +5,7 @@ parent: Solutions
nav_order: 5
---
# MediaPipe BlazePose
# MediaPipe Pose
{: .no_toc }
1. TOC
@ -88,12 +88,11 @@ hip midpoints.
### Pose Landmark Model (BlazePose Tracker)
The landmark model currently included in MediaPipe Pose predicts the location of
25 upper-body landmarks (see figure below), each with `(x, y, z, visibility)`,
plus two virtual alignment keypoints. Note that the `z` value should be
discarded as the model is currently not fully trained to predict depth, but this
is something we have on the roadmap. The model shares the same architecture as
the full-body version that predicts 33 landmarks, described in more detail in
the
25 upper-body landmarks (see figure below), each with `(x, y, z, visibility)`.
Note that the `z` value should be discarded as the model is currently not fully
trained to predict depth, but this is something we have on the roadmap. The
model shares the same architecture as the full-body version that predicts 33
landmarks, described in more detail in the
[BlazePose Google AI Blog](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html)
and in this [paper](https://arxiv.org/abs/2006.10204).
@ -147,35 +146,77 @@ MediaPipe examples.
MediaPipe Python package is available on
[PyPI](https://pypi.org/project/mediapipe/), and can be installed simply by `pip
install mediapipe` on Linux and macOS, as described below and in this
[colab](https://mediapipe.page.link/mp-py-colab). If you do need to build the
[colab](https://mediapipe.page.link/pose_py_colab). If you do need to build the
Python package from source, see
[additional instructions](../getting_started/building_examples.md#python).
Activate a Python virtual environment:
```bash
# Activate a Python virtual environment.
$ python3 -m venv mp_env && source mp_env/bin/activate
```
# Install MediaPipe Python package
Install MediaPipe Python package:
```bash
(mp_env)$ pip install mediapipe
```
# Run in Python interpreter
(mp_env)$ python3
>>> import mediapipe as mp
>>> pose_tracker = mp.examples.UpperBodyPoseTracker()
Run the following Python code:
# For image input
>>> pose_landmarks, _ = pose_tracker.run(input_file='/path/to/input/file', output_file='/path/to/output/file')
>>> pose_landmarks, annotated_image = pose_tracker.run(input_file='/path/to/file')
# To print out the pose landmarks, you can simply do "print(pose_landmarks)".
# However, the data points can be more accessible with the following approach.
>>> [print('x is', data_point.x, 'y is', data_point.y, 'z is', data_point.z, 'visibility is', data_point.visibility) for data_point in pose_landmarks.landmark]
<!-- Do not change the example code below directly. Change the corresponding example in mediapipe/python/solutions/pose.py and copy it over. -->
# For live camera input
# (Press Esc within the output image window to stop the run or let it self terminate after 30 seconds.)
>>> pose_tracker.run_live()
```python
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose
# Close the tracker.
>>> pose_tracker.close()
# For static images:
pose = mp_pose.Pose(
static_image_mode=True, min_detection_confidence=0.5)
for idx, file in enumerate(file_list):
image = cv2.imread(file)
# Convert the BGR image to RGB before processing.
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Print and draw pose landmarks on the image.
print(
'nose landmark:',
results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE])
annotated_image = image.copy()
mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', image)
pose.close()
# For webcam input:
pose = mp_pose.Pose(
min_detection_confidence=0.5, min_tracking_confidence=0.5)
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, image = cap.read()
if not success:
break
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
results = pose.process(image)
# Draw the pose annotation on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks(
image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.imshow('MediaPipe Pose', image)
if cv2.waitKey(5) & 0xFF == 27:
break
pose.close()
cap.release()
```
Tip: Use command `deactivate` to exit the Python virtual environment.

View File

@ -19,9 +19,9 @@ has_toc: false
[]() | Android | iOS | Desktop | Python | Web | Coral
:---------------------------------------------------------------------------------------- | :-----: | :-: | :-----: | :----: | :-: | :---:
[Face Detection](https://google.github.io/mediapipe/solutions/face_detection) | ✅ | ✅ | ✅ | | ✅ | ✅
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh) | ✅ | ✅ | ✅ | | |
[Iris](https://google.github.io/mediapipe/solutions/iris) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Hands](https://google.github.io/mediapipe/solutions/hands) | ✅ | ✅ | ✅ | | ✅ |
[Pose](https://google.github.io/mediapipe/solutions/pose) | ✅ | ✅ | ✅ | ✅ | ✅ |
[Hair Segmentation](https://google.github.io/mediapipe/solutions/hair_segmentation) | ✅ | | ✅ | | ✅ |
[Object Detection](https://google.github.io/mediapipe/solutions/object_detection) | ✅ | ✅ | ✅ | | | ✅

View File

@ -15,7 +15,6 @@
"mediapipe/examples/ios/handdetectiongpu/BUILD",
"mediapipe/examples/ios/handtrackinggpu/BUILD",
"mediapipe/examples/ios/iristrackinggpu/BUILD",
"mediapipe/examples/ios/multihandtrackinggpu/BUILD",
"mediapipe/examples/ios/objectdetectioncpu/BUILD",
"mediapipe/examples/ios/objectdetectiongpu/BUILD",
"mediapipe/examples/ios/upperbodyposetrackinggpu/BUILD"
@ -29,7 +28,6 @@
"//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
"//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
"//mediapipe/examples/ios/iristrackinggpu:IrisTrackingGpuApp",
"//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
"//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
"//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
"//mediapipe/examples/ios/upperbodyposetrackinggpu:UpperBodyPoseTrackingGpuApp",
@ -97,7 +95,6 @@
"mediapipe/examples/ios/handdetectiongpu",
"mediapipe/examples/ios/handtrackinggpu",
"mediapipe/examples/ios/iristrackinggpu",
"mediapipe/examples/ios/multihandtrackinggpu",
"mediapipe/examples/ios/objectdetectioncpu",
"mediapipe/examples/ios/objectdetectiongpu",
"mediapipe/examples/ios/upperbodyposetrackinggpu",

View File

@ -18,7 +18,6 @@
"mediapipe/examples/ios/handdetectiongpu",
"mediapipe/examples/ios/handtrackinggpu",
"mediapipe/examples/ios/iristrackinggpu",
"mediapipe/examples/ios/multihandtrackinggpu",
"mediapipe/examples/ios/objectdetectioncpu",
"mediapipe/examples/ios/objectdetectiongpu",
"mediapipe/examples/ios/upperbodyposetrackinggpu"

View File

@ -116,6 +116,7 @@ mediapipe_proto_library(
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/framework/formats:classification_proto",
],
)
@ -240,6 +241,7 @@ cc_library(
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
@ -800,14 +802,23 @@ cc_library(
name = "split_vector_calculator",
srcs = ["split_vector_calculator.cc"],
hdrs = ["split_vector_calculator.h"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":split_vector_calculator_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/util:resource_util",
@ -1069,6 +1080,7 @@ cc_library(
":constant_side_packet_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:collection_item_id",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message ClipVectorSizeCalculatorOptions {
extend CalculatorOptions {
optional ClipVectorSizeCalculatorOptions ext = 274674998;

View File

@ -18,6 +18,7 @@
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/integral_types.h"
#include "tensorflow/lite/interpreter.h"
@ -64,6 +65,9 @@ typedef ConcatenateVectorCalculator<TfLiteTensor>
ConcatenateTfLiteTensorVectorCalculator;
REGISTER_CALCULATOR(ConcatenateTfLiteTensorVectorCalculator);
typedef ConcatenateVectorCalculator<Tensor> ConcatenateTensorVectorCalculator;
REGISTER_CALCULATOR(ConcatenateTensorVectorCalculator);
typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
ConcatenateLandmarkVectorCalculator;
REGISTER_CALCULATOR(ConcatenateLandmarkVectorCalculator);

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message ConcatenateVectorCalculatorOptions {
extend CalculatorOptions {
optional ConcatenateVectorCalculatorOptions ext = 259397839;

View File

@ -17,6 +17,7 @@
#include "mediapipe/calculators/core/constant_side_packet_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/ret_check.h"
@ -24,6 +25,8 @@
namespace mediapipe {
namespace {} // namespace
// Generates an output side packet or multiple output side packets according to
// the specified options.
//
@ -74,6 +77,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set<std::string>();
} else if (packet_options.has_uint64_value()) {
packet.Set<uint64>();
} else if (packet_options.has_classification_list_value()) {
packet.Set<ClassificationList>();
} else {
return ::mediapipe::InvalidArgumentError(
"None of supported values were specified in options.");
@ -100,6 +105,9 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set(MakePacket<std::string>(packet_options.string_value()));
} else if (packet_options.has_uint64_value()) {
packet.Set(MakePacket<uint64>(packet_options.uint64_value()));
} else if (packet_options.has_classification_list_value()) {
packet.Set(MakePacket<ClassificationList>(
packet_options.classification_list_value()));
} else {
return ::mediapipe::InvalidArgumentError(
"None of supported values were specified in options.");

View File

@ -17,6 +17,9 @@ syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/framework/formats/classification.proto";
option objc_class_prefix = "MediaPipe";
message ConstantSidePacketCalculatorOptions {
extend CalculatorOptions {
@ -30,6 +33,7 @@ message ConstantSidePacketCalculatorOptions {
bool bool_value = 3;
string string_value = 4;
uint64 uint64_value = 5;
ClassificationList classification_list_value = 6;
}
}

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message DequantizeByteArrayCalculatorOptions {
extend CalculatorOptions {
optional DequantizeByteArrayCalculatorOptions ext = 272316343;

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message GateCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional GateCalculatorOptions ext = 261754847;

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message PacketClonerCalculatorOptions {
extend CalculatorOptions {
optional PacketClonerCalculatorOptions ext = 258872085;

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message PacketResamplerCalculatorOptions {
extend CalculatorOptions {
optional PacketResamplerCalculatorOptions ext = 95743844;

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message PacketThinnerCalculatorOptions {
extend CalculatorOptions {
optional PacketThinnerCalculatorOptions ext = 288533508;

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message QuantizeFloatVectorCalculatorOptions {
extend CalculatorOptions {
optional QuantizeFloatVectorCalculatorOptions ext = 259848061;

View File

@ -32,6 +32,9 @@ class SequenceShiftCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Index(0).SetAny();
if (cc->InputSidePackets().HasTag(kPacketOffsetTag)) {
cc->InputSidePackets().Tag(kPacketOffsetTag).Set<int>();
}
cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
return ::mediapipe::OkStatus();
}
@ -41,6 +44,8 @@ class SequenceShiftCalculator : public CalculatorBase {
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
static constexpr const char* kPacketOffsetTag = "PACKET_OFFSET";
// A positive offset means we want a packet to be output with the timestamp of
// a later packet. Stores packets waiting for their output timestamps and
// outputs a single packet when the cache fills.
@ -70,6 +75,9 @@ REGISTER_CALCULATOR(SequenceShiftCalculator);
::mediapipe::Status SequenceShiftCalculator::Open(CalculatorContext* cc) {
packet_offset_ =
cc->Options<mediapipe::SequenceShiftCalculatorOptions>().packet_offset();
if (cc->InputSidePackets().HasTag(kPacketOffsetTag)) {
packet_offset_ = cc->InputSidePackets().Tag(kPacketOffsetTag).Get<int>();
}
cache_size_ = abs(packet_offset_);
// An offset of zero is a no-op, but someone might still request it.
if (packet_offset_ == 0) {

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
message SequenceShiftCalculatorOptions {
extend CalculatorOptions {
optional SequenceShiftCalculatorOptions ext = 107633927;

View File

@ -99,6 +99,35 @@ TEST(SequenceShiftCalculatorTest, NegativeShift) {
}
}
// Tests using a side packet to specify the offset. Shifting by -2, i.e.,
// output input[i] with timestamp[i - 2]. The first two packets should be
// dropped.
TEST(SequenceShiftCalculatorTest, SidePacketOffset) {
CalculatorGraphConfig::Node node;
node.set_calculator("SequenceShiftCalculator");
node.add_input_stream("input");
node.add_output_stream("output");
node.add_input_side_packet("PACKET_OFFSET:packet_offset");
CalculatorRunner runner(node);
AddPackets(&runner);
runner.MutableSidePackets()->Tag("PACKET_OFFSET") = Adopt(new int(-2));
MP_ASSERT_OK(runner.Run());
const std::vector<Packet>& input_packets =
runner.MutableInputs()->Index(0).packets;
const std::vector<Packet>& output_packets = runner.Outputs().Index(0).packets;
ASSERT_EQ(10, input_packets.size());
// Input packet[i] should be output with the timestamp of input packet[i - 2].
// The first two packets are dropped. This means timestamps match between
// input and output packets, but the data in the output packets come from
// input_packets[i + 2].
ASSERT_EQ(8, output_packets.size());
for (int i = 0; i < output_packets.size(); ++i) {
EXPECT_EQ(input_packets[i].Timestamp(), output_packets[i].Timestamp());
EXPECT_EQ(input_packets[i + 2].Get<int>(), output_packets[i].Get<int>());
}
}
} // namespace
} // namespace mediapipe

View File

@ -16,10 +16,12 @@
#include <vector>
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "tensorflow/lite/interpreter.h"
#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
@ -46,15 +48,18 @@ typedef SplitVectorCalculator<TfLiteTensor, false>
SplitTfLiteTensorVectorCalculator;
REGISTER_CALCULATOR(SplitTfLiteTensorVectorCalculator);
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark, false>
typedef SplitVectorCalculator<Tensor, true> SplitTensorVectorCalculator;
REGISTER_CALCULATOR(SplitTensorVectorCalculator);
typedef SplitVectorCalculator<mediapipe::NormalizedLandmark, false>
SplitLandmarkVectorCalculator;
REGISTER_CALCULATOR(SplitLandmarkVectorCalculator);
typedef SplitVectorCalculator<::mediapipe::NormalizedLandmarkList, false>
typedef SplitVectorCalculator<mediapipe::NormalizedLandmarkList, false>
SplitNormalizedLandmarkListVectorCalculator;
REGISTER_CALCULATOR(SplitNormalizedLandmarkListVectorCalculator);
typedef SplitVectorCalculator<::mediapipe::NormalizedRect, false>
typedef SplitVectorCalculator<mediapipe::NormalizedRect, false>
SplitNormalizedRectVectorCalculator;
REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator);
@ -67,8 +72,12 @@ typedef SplitVectorCalculator<::tflite::gpu::gl::GlBuffer, true>
REGISTER_CALCULATOR(MovableSplitGlBufferVectorCalculator);
#endif
typedef SplitVectorCalculator<::mediapipe::Detection, false>
typedef SplitVectorCalculator<mediapipe::Detection, false>
SplitDetectionVectorCalculator;
REGISTER_CALCULATOR(SplitDetectionVectorCalculator);
typedef SplitVectorCalculator<mediapipe::ClassificationList, false>
SplitClassificationListVectorCalculator;
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
} // namespace mediapipe

View File

@ -18,6 +18,8 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
option objc_class_prefix = "MediaPipe";
// A Range {begin, end} specifies beginning ane ending indices to splice a
// vector. A vector v is spliced to have elements v[begin:(end-1)], i.e., with
// begin index inclusive and end index exclusive.

View File

@ -107,7 +107,7 @@ class BilateralFilterCalculator : public CalculatorBase {
GLuint program_ = 0;
GLuint vao_;
GLuint vbo_[2]; // vertex storage
#endif // !MEDIAPIPE_DISABLE_GPU
#endif // !MEDIAPIPE_DISABLE_GPU
};
REGISTER_CALCULATOR(BilateralFilterCalculator);

View File

@ -519,7 +519,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
renderer = yuv_renderer_.get();
src1 = gpu_helper_.CreateSourceTexture(input, 0);
} else // NOLINT(readability/braces)
#endif // iOS
#endif // iOS
{
src1 = gpu_helper_.CreateSourceTexture(input);
#if defined(TEXTURE_EXTERNAL_OES)
@ -531,7 +531,7 @@ REGISTER_CALCULATOR(ImageTransformationCalculator);
}
renderer = ext_rgb_renderer_.get();
} else // NOLINT(readability/braces)
#endif // TEXTURE_EXTERNAL_OES
#endif // TEXTURE_EXTERNAL_OES
{
if (!rgb_renderer_) {
rgb_renderer_ = absl::make_unique<QuadRenderer>();

View File

@ -0,0 +1,631 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
load("@bazel_skylib//lib:selects.bzl", "selects")
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
selects.config_setting_group(
name = "compute_shader_unavailable",
match_any = [
"//mediapipe/gpu:disable_gpu",
],
)
mediapipe_proto_library(
name = "inference_calculator_proto",
srcs = ["inference_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "inference_calculator",
srcs = ["inference_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
features = ["-layering_check"], # allow depending on inference_calculator_gpu_deps
linkopts = select({
"//mediapipe:apple": [
"-framework CoreVideo",
"-framework MetalKit",
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":inference_calculator_cc_proto",
"@com_google_absl//absl/memory",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:tensor",
"//mediapipe/util:resource_util",
"//mediapipe/util/tflite:config",
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
"//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
"//mediapipe/framework/port:ret_check",
] + select({
":compute_shader_unavailable": [],
"//conditions:default": [":inference_calculator_gpu_deps"],
}) + select({
"//conditions:default": [],
"//mediapipe:android": [
"//mediapipe/util/android/file/base",
"@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
],
}) + select({
"//conditions:default": [
"//mediapipe/util:cpu_util",
],
}),
alwayslink = 1,
)
cc_library(
name = "inference_calculator_gpu_deps",
deps = selects.with_or({
"//mediapipe:ios": [
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/objc:mediapipe_framework_ios",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu/metal:buffer_convert",
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
],
"//mediapipe:macos": [],
"//conditions:default": [
"//mediapipe/util/tflite:tflite_gpu_runner",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
],
}),
)
mediapipe_proto_library(
name = "tensor_converter_calculator_proto",
srcs = ["tensor_converter_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "tensor_converter_calculator",
srcs = ["tensor_converter_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
features = ["-layering_check"], # allow depending on tensor_converter_calculator_gpu_deps
linkopts = select({
"//mediapipe:apple": [
"-framework CoreVideo",
"-framework MetalKit",
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":tensor_converter_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework:port",
"//mediapipe/util:resource_util",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": ["tensor_converter_calculator_gpu_deps"],
}),
alwayslink = 1,
)
cc_library(
name = "tensor_converter_calculator_gpu_deps",
deps = select({
"//mediapipe:android": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
],
"//mediapipe:ios": [
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/objc:mediapipe_framework_ios",
],
"//mediapipe:macos": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:shader_util",
"//mediapipe/gpu:gpu_buffer",
],
}),
)
cc_test(
name = "tensor_converter_calculator_test",
srcs = ["tensor_converter_calculator_test.cc"],
deps = [
":tensor_converter_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/tool:validate_type",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
)
mediapipe_proto_library(
name = "tensors_to_detections_calculator_proto",
srcs = ["tensors_to_detections_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "tensors_to_detections_calculator",
srcs = ["tensors_to_detections_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
features = ["-layering_check"], # allow depending on tensors_to_detections_calculator_gpu_deps
linkopts = select({
"//mediapipe:apple": [
"-framework CoreVideo",
"-framework MetalKit",
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":tensors_to_detections_calculator_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:port",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:location",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/formats/object_detection:anchor_cc_proto",
"//mediapipe/framework/port:ret_check",
] + select({
":compute_shader_unavailable": [],
"//conditions:default": [":tensors_to_detections_calculator_gpu_deps"],
}),
alwayslink = 1,
)
cc_library(
name = "tensors_to_detections_calculator_gpu_deps",
deps = select({
"//mediapipe:ios": [
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:MPPMetalHelper",
],
"//mediapipe:macos": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
],
}),
)
mediapipe_proto_library(
name = "tensors_to_landmarks_calculator_proto",
srcs = ["tensors_to_landmarks_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "tensors_to_landmarks_calculator",
srcs = ["tensors_to_landmarks_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":tensors_to_landmarks_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
],
alwayslink = 1,
)
cc_library(
name = "tensors_to_floats_calculator",
srcs = ["tensors_to_floats_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
],
alwayslink = 1,
)
cc_library(
name = "tensors_to_classification_calculator",
srcs = ["tensors_to_classification_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = [
":tensors_to_classification_calculator_cc_proto",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:location",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/formats:tensor",
"//mediapipe/util:resource_util",
] + select({
"//mediapipe:android": [
"//mediapipe/util/android/file/base",
],
"//mediapipe:ios": [
"//mediapipe/util/android/file/base",
],
"//conditions:default": [
"//mediapipe/framework/port:file_helpers",
],
}),
alwayslink = 1,
)
mediapipe_proto_library(
name = "tensors_to_classification_calculator_proto",
srcs = ["tensors_to_classification_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_test(
name = "tensors_to_classification_calculator_test",
srcs = ["tensors_to_classification_calculator_test.cc"],
data = ["testdata/labelmap.txt"],
deps = [
":tensors_to_classification_calculator",
":tensors_to_classification_calculator_cc_proto",
"//mediapipe/framework:calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"@com_google_absl//absl/memory",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "image_to_tensor_calculator",
srcs = ["image_to_tensor_calculator.cc"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
features = ["-layering_check"], # allow depending on image_to_tensor_calculator_gpu_deps
visibility = ["//visibility:public"],
deps = [
":image_to_tensor_calculator_cc_proto",
":image_to_tensor_converter",
":image_to_tensor_converter_opencv",
":image_to_tensor_utils",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:port",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [":image_to_tensor_calculator_gpu_deps"],
}),
alwayslink = 1,
)
cc_library(
name = "image_to_tensor_calculator_gpu_deps",
deps = select({
"//mediapipe:android": [
":image_to_tensor_converter_gl_buffer",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
],
"//mediapipe:apple": [
":image_to_tensor_converter_metal",
"//mediapipe/gpu:MPPMetalHelper",
"//mediapipe/gpu:gpu_buffer",
],
"//conditions:default": [
":image_to_tensor_converter_gl_buffer",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
],
}),
)
mediapipe_proto_library(
name = "image_to_tensor_calculator_proto",
srcs = ["image_to_tensor_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_test(
name = "image_to_tensor_calculator_test",
srcs = ["image_to_tensor_calculator_test.cc"],
data = [
"testdata/image_to_tensor/input.jpg",
"testdata/image_to_tensor/large_sub_rect.png",
"testdata/image_to_tensor/large_sub_rect_keep_aspect.png",
"testdata/image_to_tensor/large_sub_rect_keep_aspect_with_rotation.png",
"testdata/image_to_tensor/medium_sub_rect_keep_aspect.png",
"testdata/image_to_tensor/medium_sub_rect_keep_aspect_with_rotation.png",
"testdata/image_to_tensor/medium_sub_rect_with_rotation.png",
"testdata/image_to_tensor/noop_except_range.png",
],
deps = [
":image_to_tensor_calculator",
":image_to_tensor_utils",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/tool:validate_type",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@org_tensorflow//tensorflow/lite:framework",
],
)
cc_library(
name = "image_to_tensor_converter",
hdrs = ["image_to_tensor_converter.h"],
deps = [
":image_to_tensor_utils",
"//mediapipe/framework:packet",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:statusor",
],
)
cc_library(
name = "image_to_tensor_converter_opencv",
srcs = ["image_to_tensor_converter_opencv.cc"],
hdrs = ["image_to_tensor_converter_opencv.h"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
deps = [
":image_to_tensor_converter",
":image_to_tensor_utils",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
],
)
cc_library(
name = "image_to_tensor_converter_gl_buffer",
srcs = ["image_to_tensor_converter_gl_buffer.cc"],
hdrs = ["image_to_tensor_converter_gl_buffer.h"],
deps = ["//mediapipe/framework:port"] + select({
"//mediapipe:apple": [],
"//conditions:default": [
":image_to_tensor_converter",
":image_to_tensor_utils",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_buffer_format",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:types",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:command_queue",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_call",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:request_gpu_info",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:variable",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util",
],
}),
)
cc_library(
name = "image_to_tensor_converter_gl_texture",
srcs = ["image_to_tensor_converter_gl_texture.cc"],
hdrs = ["image_to_tensor_converter_gl_texture.h"],
deps = ["//mediapipe/framework:port"] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [
":image_to_tensor_converter",
":image_to_tensor_utils",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:shader_util",
],
}),
)
cc_library(
name = "image_to_tensor_converter_metal",
srcs = ["image_to_tensor_converter_metal.cc"],
hdrs = ["image_to_tensor_converter_metal.h"],
copts = select({
"//mediapipe:apple": [
"-x objective-c++",
"-fobjc-arc", # enable reference-counting
],
"//conditions:default": [],
}),
linkopts = select({
"//mediapipe:apple": [
"-framework CoreVideo",
"-framework MetalKit",
],
"//conditions:default": [],
}),
deps = ["//mediapipe/framework:port"] + select({
"//mediapipe:apple": [
":image_to_tensor_converter",
":image_to_tensor_utils",
"//mediapipe/gpu:MPPMetalHelper",
"@com_google_absl//absl/strings",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_buffer_format",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:types",
],
"//conditions:default": [],
}),
)
cc_library(
name = "image_to_tensor_utils",
srcs = ["image_to_tensor_utils.cc"],
hdrs = ["image_to_tensor_utils.h"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:statusor",
"@com_google_absl//absl/types:optional",
],
)
cc_test(
name = "image_to_tensor_utils_test",
srcs = ["image_to_tensor_utils_test.cc"],
deps = [
":image_to_tensor_utils",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:gtest_main",
],
)

View File

@ -0,0 +1,275 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <array>
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#if MEDIAPIPE_METAL_ENABLED
#include "mediapipe/calculators/tensor/image_to_tensor_converter_metal.h"
#include "mediapipe/gpu/MPPMetalHelper.h"
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#else
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
namespace {
constexpr char kInputCpu[] = "IMAGE";
constexpr char kInputGpu[] = "IMAGE_GPU";
constexpr char kOutputMatrix[] = "MATRIX";
constexpr char kOutput[] = "TENSORS";
constexpr char kInputNormRect[] = "NORM_RECT";
constexpr char kOutputLetterboxPadding[] = "LETTERBOX_PADDING";
} // namespace
namespace mediapipe {
// Converts image into Tensor, possibly with cropping, resizing and
// normalization, according to specified inputs and options.
//
// Inputs:
// IMAGE - ImageFrame [ImageFormat::SRGB/SRGBA]
// Image to extract from.
// IMAGE_GPU - GpuBuffer [GpuBufferFormat::kBGRA32]
// Image to extract from.
// (Either IMAGE or IMAGE_GPU has to be specified.)
//
// NORM_RECT - NormalizedRect @Optional
// Describes region of image to extract.
// @Optional: rect covering the whole image is used if not specified.
//
// Outputs:
// TENSORS - std::vector<Tensor>
// Vector containing a single Tensor populated with an extrated RGB image.
// MATRIX - std::array<float, 16> @Optional
// An std::array<float, 16> representing a 4x4 row-major-order matrix which
// can be used to map a point on the output tensor to a point on the input
// image.
// LETTERBOX_PADDING - std::array<float, 4> @Optional
// An std::array<float, 4> representing the letterbox padding from the 4
// sides ([left, top, right, bottom]) of the output image, normalized to
// [0.f, 1.f] by the output dimensions. The padding values are non-zero only
// when the "keep_aspect_ratio" is true.
//
// For instance, when the input image is 10x10 (width x height) and the
// output dimensions specified in the calculator option are 20x40 and
// "keep_aspect_ratio" is true, the calculator scales the input image to
// 20x20 and places it in the middle of the output image with an equal
// padding of 10 pixels at the top and the bottom. The resulting array is
// therefore [0.f, 0.25f, 0.f, 0.25f] (10/40 = 0.25f).
//
// Example:
// node {
// calculator: "ImageToTensorCalculator"
// input_stream: "IMAGE:image" # or "IMAGE_GPU:image"
// input_stream: "NORM_RECT:roi"
// output_stream: "TENSORS:tensors"
// output_stream: "MATRIX:matrix"
// options {
// [mediapipe.ImageToTensorCalculatorOptions.ext] {
// output_tensor_width: 256
// output_tensor_height: 256
// keep_aspect_ratio: false
// output_tensor_float_range {
// min: 0.0
// max: 1.0
// }
// # gpu_origin: CONVENTIONAL # or TOP_LEFT
// }
// }
// }
class ImageToTensorCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
const auto& options =
cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
RET_CHECK(options.has_output_tensor_float_range())
<< "Output tensor range is required.";
RET_CHECK_LT(options.output_tensor_float_range().min(),
options.output_tensor_float_range().max())
<< "Valid output tensor range is required.";
RET_CHECK_GT(options.output_tensor_width(), 0)
<< "Valid output tensor width is required.";
RET_CHECK_GT(options.output_tensor_height(), 0)
<< "Valid output tensor height is required.";
if (cc->Inputs().HasTag(kInputNormRect)) {
cc->Inputs().Tag(kInputNormRect).Set<mediapipe::NormalizedRect>();
}
if (cc->Outputs().HasTag(kOutputLetterboxPadding)) {
cc->Outputs().Tag(kOutputLetterboxPadding).Set<std::array<float, 4>>();
}
if (cc->Outputs().HasTag(kOutputMatrix)) {
cc->Outputs().Tag(kOutputMatrix).Set<std::array<float, 16>>();
}
const bool has_cpu_input = cc->Inputs().HasTag(kInputCpu);
const bool has_gpu_input = cc->Inputs().HasTag(kInputGpu);
RET_CHECK_EQ((has_cpu_input ? 1 : 0) + (has_gpu_input ? 1 : 0), 1)
<< "Either CPU or GPU input is expected, not both.";
if (has_cpu_input) {
cc->Inputs().Tag(kInputCpu).Set<mediapipe::ImageFrame>();
} else if (has_gpu_input) {
#if MEDIAPIPE_DISABLE_GPU
return mediapipe::UnimplementedError("GPU processing is disabled");
#else
#if MEDIAPIPE_METAL_ENABLED
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#else
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#endif // MEDIAPIPE_METAL_ENABLED
cc->Inputs().Tag(kInputGpu).Set<mediapipe::GpuBuffer>();
#endif // MEDIAPIPE_DISABLE_GPU
}
cc->Outputs().Tag(kOutput).Set<std::vector<Tensor>>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) {
// Makes sure outputs' next timestamp bound update is handled automatically
// by the framework.
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
output_width_ = options_.output_tensor_width();
output_height_ = options_.output_tensor_height();
range_min_ = options_.output_tensor_float_range().min();
range_max_ = options_.output_tensor_float_range().max();
if (cc->Inputs().HasTag(kInputCpu)) {
ASSIGN_OR_RETURN(converter_, CreateOpenCvConverter(cc));
} else {
#if MEDIAPIPE_DISABLE_GPU
return mediapipe::UnimplementedError("GPU processing is disabled");
#else
#if MEDIAPIPE_METAL_ENABLED
ASSIGN_OR_RETURN(converter_, CreateMetalConverter(cc));
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
ASSIGN_OR_RETURN(converter_, CreateImageToGlBufferTensorConverter(
cc, DoesInputStartAtBottom()));
#else
ASSIGN_OR_RETURN(converter_, CreateImageToGlTextureTensorConverter(
cc, DoesInputStartAtBottom()));
#endif // MEDIAPIPE_METAL_ENABLED
#endif // MEDIAPIPE_DISABLE_GPU
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status Process(CalculatorContext* cc) {
const InputStreamShard& input = cc->Inputs().Tag(
cc->Inputs().HasTag(kInputCpu) ? kInputCpu : kInputGpu);
if (input.IsEmpty()) {
// Timestamp bound update happens automatically. (See Open().)
return ::mediapipe::OkStatus();
}
absl::optional<mediapipe::NormalizedRect> norm_rect;
if (cc->Inputs().HasTag(kInputNormRect)) {
if (cc->Inputs().Tag(kInputNormRect).IsEmpty()) {
// Timestamp bound update happens automatically. (See Open().)
return ::mediapipe::OkStatus();
}
norm_rect =
cc->Inputs().Tag(kInputNormRect).Get<mediapipe::NormalizedRect>();
if (norm_rect->width() == 0 && norm_rect->height() == 0) {
// WORKAROUND: some existing graphs may use sentinel rects {width=0,
// height=0, ...} quite often and calculator has to handle them
// gracefully by updating timestamp bound instead of returning failure.
// Timestamp bound update happens automatically. (See Open().)
// NOTE: usage of sentinel rects should be avoided.
DLOG(WARNING)
<< "Updating timestamp bound in response to a sentinel rect";
return ::mediapipe::OkStatus();
}
}
const Packet& image_packet = input.Value();
const Size& size = converter_->GetImageSize(image_packet);
RotatedRect roi = GetRoi(size.width, size.height, norm_rect);
ASSIGN_OR_RETURN(auto padding, PadRoi(options_.output_tensor_width(),
options_.output_tensor_height(),
options_.keep_aspect_ratio(), &roi));
if (cc->Outputs().HasTag(kOutputLetterboxPadding)) {
cc->Outputs()
.Tag(kOutputLetterboxPadding)
.AddPacket(MakePacket<std::array<float, 4>>(padding).At(
cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kOutputMatrix)) {
std::array<float, 16> matrix;
GetRotatedSubRectToRectTransformMatrix(roi, size.width, size.height,
/*flip_horizontaly=*/false,
&matrix);
cc->Outputs()
.Tag(kOutputMatrix)
.AddPacket(MakePacket<std::array<float, 16>>(std::move(matrix))
.At(cc->InputTimestamp()));
}
ASSIGN_OR_RETURN(
Tensor tensor,
converter_->Convert(image_packet, roi, {output_width_, output_height_},
range_min_, range_max_));
std::vector<Tensor> result;
result.push_back(std::move(tensor));
cc->Outputs().Tag(kOutput).AddPacket(
MakePacket<std::vector<Tensor>>(std::move(result))
.At(cc->InputTimestamp()));
return ::mediapipe::OkStatus();
}
private:
bool DoesInputStartAtBottom() {
return options_.gpu_origin() != mediapipe::GpuOrigin_Mode_TOP_LEFT;
}
std::unique_ptr<ImageToTensorConverter> converter_;
mediapipe::ImageToTensorCalculatorOptions options_;
int output_width_ = 0;
int output_height_ = 0;
float range_min_ = 0.0f;
float range_max_ = 1.0f;
};
REGISTER_CALCULATOR(ImageToTensorCalculator);
} // namespace mediapipe

View File

@ -0,0 +1,64 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message GpuOrigin {
enum Mode {
DEFAULT = 0;
// OpenGL: bottom-left origin
// Metal : top-left origin
CONVENTIONAL = 1;
// OpenGL: top-left origin
// Metal : top-left origin
TOP_LEFT = 2;
}
}
message ImageToTensorCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional ImageToTensorCalculatorOptions ext = 334361939;
}
// Range of float values [min, max].
// min, must be strictly less than max.
message FloatRange {
optional float min = 1;
optional float max = 2;
}
optional int32 output_tensor_width = 1;
optional int32 output_tensor_height = 2;
// If true, image region will be extracted and copied into tensor keeping
// region aspect ratio, which usually results in letterbox padding. Otherwise,
// if false, image region is stretched to fill output tensor fully.
optional bool keep_aspect_ratio = 3;
// Output tensor element range/type image pixels are converted to.
oneof range {
FloatRange output_tensor_float_range = 4;
}
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
// to be flipped vertically as tensors are expected to start at top.
// (DEFAULT or unset interpreted as CONVENTIONAL.)
optional GpuOrigin.Mode gpu_origin = 5;
}

View File

@ -0,0 +1,262 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/strings/substitute.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
cv::Mat GetRgb(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgb;
cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGB);
return rgb;
}
cv::Mat GetRgba(absl::string_view path) {
cv::Mat bgr = cv::imread(file::JoinPath("./", path));
cv::Mat rgb;
cv::cvtColor(bgr, rgb, cv::COLOR_BGR2RGBA);
return rgb;
}
// Image to tensor test template.
// No processing/assertions should be done after the function is invoked.
void RunTest(cv::Mat input, cv::Mat expected_result, float range_min,
float range_max, int tensor_width, int tensor_height,
bool keep_aspect, const mediapipe::NormalizedRect& roi) {
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(R"(
input_stream: "input_image"
input_stream: "roi"
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:input_image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:tensor"
options {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: $0
output_tensor_height: $1
keep_aspect_ratio: $4
output_tensor_float_range {
min: $2
max: $3
}
}
}
}
)",
/*$0=*/tensor_width,
/*$1=*/tensor_height,
/*$2=*/range_min,
/*$3=*/range_max,
/*$4=*/keep_aspect ? "true" : "false"));
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
CalculatorGraph graph;
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
ImageFrame input_image(
input.channels() == 4 ? ImageFormat::SRGBA : ImageFormat::SRGB,
input.cols, input.rows, input.step, input.data, [](uint8*) {});
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image",
MakePacket<ImageFrame>(std::move(input_image)).At(Timestamp(0))));
MP_ASSERT_OK(graph.AddPacketToInputStream(
"roi",
MakePacket<mediapipe::NormalizedRect>(std::move(roi)).At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_THAT(output_packets, testing::SizeIs(1));
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
ASSERT_THAT(tensor_vec, testing::SizeIs(1));
const Tensor& tensor = tensor_vec[0];
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
auto view = tensor.GetCpuReadView();
cv::Mat tensor_mat(tensor_height, tensor_width, CV_32FC3,
const_cast<float*>(view.buffer<float>()));
cv::Mat result_rgb;
auto transformation =
GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f)
.ValueOrDie();
tensor_mat.convertTo(result_rgb, CV_8UC3, transformation.scale,
transformation.offset);
cv::Mat diff;
cv::absdiff(result_rgb, expected_result, diff);
double max_val;
cv::minMaxLoc(diff, nullptr, &max_val);
// Expects the maximum absolute pixel-by-pixel difference is less than 5.
EXPECT_LE(max_val, 5);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.CloseInputStream("roi"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(0);
RunTest(
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, roi);
}
TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * 90.0f / 180.0f);
RunTest(GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f, /*range_max=*/1.0f,
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
roi);
}
TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.65f);
roi.set_y_center(0.4f);
roi.set_width(0.5f);
roi.set_height(0.5f);
roi.set_rotation(M_PI * -45.0f / 180.0f);
RunTest(
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"),
/*range_min=*/-1.0f,
/*range_max=*/1.0f,
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false, roi);
}
TEST(ImageToTensorCalculatorTest, LargeSubRect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
RunTest(GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
roi);
}
TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspect) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(0);
RunTest(
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, roi);
}
TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectWithRotation) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.5f);
roi.set_height(1.1f);
roi.set_rotation(M_PI * -15.0f / 180.0f);
RunTest(GetRgba("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
roi);
}
TEST(ImageToTensorCalculatorTest, NoOpExceptRange) {
mediapipe::NormalizedRect roi;
roi.set_x_center(0.5f);
roi.set_y_center(0.5f);
roi.set_width(1.0f);
roi.set_height(1.0f);
roi.set_rotation(0);
RunTest(GetRgba("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
roi);
}
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,53 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
struct Size {
int width;
int height;
};
// Converts image to tensor.
class ImageToTensorConverter {
public:
virtual ~ImageToTensorConverter() = default;
virtual Size GetImageSize(const Packet& image_packet) = 0;
// Converts image to tensor.
// @image_packet contains image to extract from.
// @roi describes region of interest within the image to extract (absolute
// values).
// @output_dims dimensions of output tensor.
// @range_min/max describes output tensor range image pixels should converted
// to.
virtual ::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
const RotatedRect& roi,
const Size& output_dims,
float range_min,
float range_max) = 0;
};
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_H_

View File

@ -0,0 +1,340 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_buffer.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#include <array>
#include <memory>
#include <vector>
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/gl/command_queue.h"
#include "tensorflow/lite/delegates/gpu/gl/converters/util.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h"
#include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h"
#include "tensorflow/lite/delegates/gpu/gl/variable.h"
namespace mediapipe {
namespace {
// Implements a common pattern of extracting a subrect from RGBA input texture
// and resizing it into a buffer.
class SubRectExtractorGl {
public:
// Extracts a region defined by @sub_rect, removes A channel, transforms input
// pixels as alpha * x + beta and resizes result into destination.
::mediapipe::Status ExtractSubRectToBuffer(
const tflite::gpu::gl::GlTexture& texture,
const tflite::gpu::HW& texture_size, const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha, float beta,
const tflite::gpu::HW& destination_size,
tflite::gpu::gl::CommandQueue* command_queue,
tflite::gpu::gl::GlBuffer* destination);
static ::mediapipe::StatusOr<SubRectExtractorGl> Create(
bool input_starts_at_bottom);
private:
explicit SubRectExtractorGl(tflite::gpu::gl::GlProgram program,
tflite::gpu::uint3 workgroup_size)
: program_(std::move(program)), workgroup_size_(workgroup_size) {}
tflite::gpu::gl::GlProgram program_;
tflite::gpu::uint3 workgroup_size_;
};
::mediapipe::Status SetMat4x4(const tflite::gpu::gl::GlProgram& program,
const std::string& name, float* data) {
GLint uniform_id;
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glGetUniformLocation, &uniform_id,
program.id(), name.c_str()));
return TFLITE_GPU_CALL_GL(glProgramUniformMatrix4fv, program.id(), uniform_id,
1, GL_TRUE, data);
}
class GlParametersOverride {
public:
static ::mediapipe::StatusOr<GlParametersOverride> Create(
const std::vector<std::pair<GLenum, GLint>>& overrides) {
std::vector<GLint> old_values(overrides.size());
for (int i = 0; i < overrides.size(); ++i) {
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glGetTexParameteriv, GL_TEXTURE_2D,
overrides[i].first,
&old_values[i]));
if (overrides[i].second != old_values[i]) {
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glTexParameteri, GL_TEXTURE_2D,
overrides[i].first,
overrides[i].second));
}
}
return GlParametersOverride(overrides, std::move(old_values));
}
::mediapipe::Status Revert() {
for (int i = 0; i < overrides_.size(); ++i) {
if (overrides_[i].second != old_values_[i]) {
MP_RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glTexParameteri, GL_TEXTURE_2D,
overrides_[i].first,
old_values_[i]));
}
}
return ::mediapipe::OkStatus();
}
private:
GlParametersOverride(const std::vector<std::pair<GLenum, GLint>>& overrides,
std::vector<GLint> old_values)
: overrides_(overrides), old_values_(std::move(old_values)) {}
std::vector<std::pair<GLenum, GLint>> overrides_;
std::vector<GLint> old_values_;
};
constexpr char kShaderCode[] = R"(
layout(std430) buffer;
precision highp float;
// It is possible to use "vec3 elements[];" here, however due to alignment
// requirements it works only when "packed" layout is used. "packed" layout is
// determined by implementation and it's expected that OpenGL API is used to
// query the layout. Favoring float array over vec3, considering performance is
// comparable, layout is the same and no need for layout querying (even though
// it's not quite needed here as there's only one member).
layout(binding = 0) writeonly buffer B0 {
float elements[];
} output_data;
uniform ivec2 out_size;
uniform float alpha;
uniform float beta;
uniform mat4 transform_matrix;
uniform mediump sampler2D input_data;
void main() {
int out_width = out_size.x;
int out_height = out_size.y;
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
if (gid.x >= out_width || gid.y >= out_height) {
return;
}
// transform from image.width, image.height range to [0, 1]
float normal_x = (float(gid.x) + 0.5f) / float(out_width);
float normal_y = (float(gid.y) + 0.5f) / float(out_height);
vec4 tc = vec4(normal_x, normal_y, 0.0, 1.0);
// Apply transformation from roi coordinates to original image coordinates.
tc = transform_matrix * tc;
#ifdef INPUT_STARTS_AT_BOTTOM
// Opengl texture sampler has origin in lower left corner,
// so we invert y coordinate.
tc.y = 1.0f - tc.y;
#endif // INPUT_STARTS_AT_BOTTOM
vec4 src_value = alpha * texture(input_data, tc.xy) + beta;
int linear_index = gid.y * out_width + gid.x;
// output_data.elements is populated as though it contains vec3 elements.
int first_component_index = 3 * linear_index;
output_data.elements[first_component_index] = src_value.r;
output_data.elements[first_component_index + 1] = src_value.g;
output_data.elements[first_component_index + 2] = src_value.b;
}
)";
::mediapipe::Status SubRectExtractorGl::ExtractSubRectToBuffer(
const tflite::gpu::gl::GlTexture& texture,
const tflite::gpu::HW& texture_size, const RotatedRect& texture_sub_rect,
bool flip_horizontaly, float alpha, float beta,
const tflite::gpu::HW& destination_size,
tflite::gpu::gl::CommandQueue* command_queue,
tflite::gpu::gl::GlBuffer* destination) {
std::array<float, 16> transform_mat;
GetRotatedSubRectToRectTransformMatrix(texture_sub_rect, texture_size.w,
texture_size.h, flip_horizontaly,
&transform_mat);
MP_RETURN_IF_ERROR(texture.BindAsSampler2D(0));
ASSIGN_OR_RETURN(auto overrides, GlParametersOverride::Create(
{{GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE},
{GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE},
{GL_TEXTURE_MIN_FILTER, GL_LINEAR},
{GL_TEXTURE_MAG_FILTER, GL_LINEAR}}));
MP_RETURN_IF_ERROR(destination->BindToIndex(0));
MP_RETURN_IF_ERROR(program_.SetParameter({"input_data", 0}));
MP_RETURN_IF_ERROR(
SetMat4x4(program_, "transform_matrix", transform_mat.data()));
MP_RETURN_IF_ERROR(program_.SetParameter(
{"out_size", tflite::gpu::int2(destination_size.w, destination_size.h)}));
MP_RETURN_IF_ERROR(program_.SetParameter({"alpha", alpha}));
MP_RETURN_IF_ERROR(program_.SetParameter({"beta", beta}));
tflite::gpu::uint3 num_workgroups = tflite::gpu::DivideRoundUp(
tflite::gpu::uint3{destination_size.w, destination_size.h, 1},
workgroup_size_);
MP_RETURN_IF_ERROR(command_queue->Dispatch(program_, num_workgroups));
return overrides.Revert();
}
::mediapipe::StatusOr<SubRectExtractorGl> SubRectExtractorGl::Create(
bool input_starts_at_bottom) {
const tflite::gpu::uint3 workgroup_size = {8, 8, 1};
std::string starts_at_bottom_def;
if (input_starts_at_bottom) {
starts_at_bottom_def = R"(
#define INPUT_STARTS_AT_BOTTOM;
)";
}
const std::string full_shader_source =
absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size),
starts_at_bottom_def, kShaderCode);
tflite::gpu::gl::GlShader shader;
MP_RETURN_IF_ERROR(tflite::gpu::gl::GlShader::CompileShader(
GL_COMPUTE_SHADER, full_shader_source, &shader));
tflite::gpu::gl::GlProgram program;
MP_RETURN_IF_ERROR(
tflite::gpu::gl::GlProgram::CreateWithShader(shader, &program));
return SubRectExtractorGl(std::move(program), workgroup_size);
}
class GlProcessor : public ImageToTensorConverter {
public:
::mediapipe::Status Init(CalculatorContext* cc, bool input_starts_at_bottom) {
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
return gl_helper_.RunInGlContext(
[this, input_starts_at_bottom]() -> ::mediapipe::Status {
tflite::gpu::GpuInfo gpu_info;
MP_RETURN_IF_ERROR(tflite::gpu::gl::RequestGpuInfo(&gpu_info));
RET_CHECK(tflite::gpu::IsOpenGl31OrAbove(gpu_info))
<< "OpenGL ES 3.1 is required.";
command_queue_ = tflite::gpu::gl::NewCommandQueue(gpu_info);
ASSIGN_OR_RETURN(auto extractor,
SubRectExtractorGl::Create(input_starts_at_bottom));
extractor_ =
absl::make_unique<SubRectExtractorGl>(std::move(extractor));
return ::mediapipe::OkStatus();
});
}
Size GetImageSize(const Packet& image_packet) override {
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
return {image.width(), image.height()};
}
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
const RotatedRect& roi,
const Size& output_dims,
float range_min,
float range_max) override {
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
return InvalidArgumentError(
absl::StrCat("Only BGRA/RGBA textures are supported, passed format: ",
static_cast<uint32_t>(input.format())));
}
constexpr int kNumChannels = 3;
Tensor tensor(Tensor::ElementType::kFloat32,
{1, output_dims.height, output_dims.width, kNumChannels});
MP_RETURN_IF_ERROR(gl_helper_.RunInGlContext(
[this, &tensor, &input, &roi, &output_dims, range_min,
range_max]() -> ::mediapipe::Status {
constexpr int kRgbaNumChannels = 4;
auto source_texture = gl_helper_.CreateSourceTexture(input);
tflite::gpu::gl::GlTexture input_texture(
GL_TEXTURE_2D, source_texture.name(), GL_RGBA,
source_texture.width() * source_texture.height() *
kRgbaNumChannels * sizeof(uint8_t),
/*layer=*/0,
/*owned=*/false);
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(kInputImageRangeMin,
kInputImageRangeMax,
range_min, range_max));
auto buffer_view = tensor.GetOpenGlBufferWriteView();
tflite::gpu::gl::GlBuffer output(GL_SHADER_STORAGE_BUFFER,
buffer_view.name(), tensor.bytes(),
/*offset=*/0,
/*has_ownership=*/false);
MP_RETURN_IF_ERROR(extractor_->ExtractSubRectToBuffer(
input_texture,
tflite::gpu::HW(source_texture.height(), source_texture.width()),
roi,
/*flip_horizontaly=*/false, transform.scale, transform.offset,
tflite::gpu::HW(output_dims.height, output_dims.width),
command_queue_.get(), &output));
return ::mediapipe::OkStatus();
}));
return tensor;
}
~GlProcessor() override {
gl_helper_.RunInGlContext([this]() {
// Release OpenGL resources.
extractor_ = nullptr;
command_queue_ = nullptr;
});
}
private:
std::unique_ptr<tflite::gpu::gl::CommandQueue> command_queue_;
std::unique_ptr<SubRectExtractorGl> extractor_;
mediapipe::GlCalculatorHelper gl_helper_;
};
} // namespace
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
bool input_starts_at_bottom) {
auto result = absl::make_unique<GlProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
}
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31

View File

@ -0,0 +1,41 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
// Creates image to tensor (represented as OpenGL buffer) converter.
// NOTE: mediapipe::GlCalculatorHelper::UpdateContract invocation must precede
// converter creation.
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
bool input_starts_at_bottom);
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_BUFFER_H_

View File

@ -0,0 +1,323 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#include <array>
#include <memory>
#include <vector>
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/shader_util.h"
namespace mediapipe {
namespace {
class GlParametersOverride {
public:
static ::mediapipe::StatusOr<GlParametersOverride> Create(
const std::vector<std::pair<GLenum, GLint>>& overrides) {
std::vector<GLint> old_values(overrides.size());
for (int i = 0; i < overrides.size(); ++i) {
glGetTexParameteriv(GL_TEXTURE_2D, overrides[i].first, &old_values[i]);
if (overrides[i].second != old_values[i]) {
glTexParameteri(GL_TEXTURE_2D, overrides[i].first, overrides[i].second);
}
}
return GlParametersOverride(overrides, std::move(old_values));
}
::mediapipe::Status Revert() {
for (int i = 0; i < overrides_.size(); ++i) {
if (overrides_[i].second != old_values_[i]) {
glTexParameteri(GL_TEXTURE_2D, overrides_[i].first, old_values_[i]);
}
}
return ::mediapipe::OkStatus();
}
private:
GlParametersOverride(const std::vector<std::pair<GLenum, GLint>>& overrides,
std::vector<GLint> old_values)
: overrides_(overrides), old_values_(std::move(old_values)) {}
std::vector<std::pair<GLenum, GLint>> overrides_;
std::vector<GLint> old_values_;
};
constexpr int kAttribVertex = 0;
constexpr int kAttribTexturePosition = 1;
constexpr int kNumAttributes = 2;
class GlProcessor : public ImageToTensorConverter {
public:
::mediapipe::Status Init(CalculatorContext* cc, bool input_starts_at_bottom) {
MP_RETURN_IF_ERROR(gl_helper_.Open(cc));
return gl_helper_.RunInGlContext([this, input_starts_at_bottom]()
-> ::mediapipe::Status {
const GLint attr_location[kNumAttributes] = {
kAttribVertex,
kAttribTexturePosition,
};
const GLchar* attr_name[kNumAttributes] = {
"position",
"texture_coordinate",
};
constexpr GLchar kExtractSubRectVertexShader[] = R"(
in vec4 position;
in mediump vec4 texture_coordinate;
out mediump vec2 sample_coordinate;
uniform mat4 transform_matrix;
void main() {
gl_Position = position;
// Apply transformation from roi coordinates to original image coordinates.
vec4 tc = transform_matrix * texture_coordinate;
#ifdef INPUT_STARTS_AT_BOTTOM
// Opengl texture sampler has origin in lower left corner,
// so we invert y coordinate.
tc.y = 1.0 - tc.y;
#endif // defined(INPUT_STARTS_AT_BOTTOM)
sample_coordinate = tc.xy;
}
)";
constexpr GLchar kExtractSubRectFragBody[] = R"(
DEFAULT_PRECISION(mediump, float)
// Provided by kExtractSubRectVertexShader.
in vec2 sample_coordinate;
uniform sampler2D input_texture;
uniform float alpha;
uniform float beta;
#ifdef GL_ES
#define fragColor gl_FragColor
#else
out vec4 fragColor;
#endif // defined(GL_ES);
void main() {
fragColor = alpha * texture2D(input_texture, sample_coordinate) + beta;
}
)";
std::string starts_at_bottom_def;
if (input_starts_at_bottom) {
starts_at_bottom_def = R"(
#define INPUT_STARTS_AT_BOTTOM
)";
}
// Create program and set parameters.
const std::string extract_sub_rect_vertex_src =
absl::StrCat(mediapipe::kMediaPipeVertexShaderPreamble,
starts_at_bottom_def, kExtractSubRectVertexShader);
const std::string extract_sub_rect_frag_src = absl::StrCat(
mediapipe::kMediaPipeFragmentShaderPreamble, kExtractSubRectFragBody);
mediapipe::GlhCreateProgram(extract_sub_rect_vertex_src.c_str(),
extract_sub_rect_frag_src.c_str(),
kNumAttributes, &attr_name[0], attr_location,
&program_);
RET_CHECK(program_) << "Problem initializing image to tensor program.";
glUseProgram(program_);
glUniform1i(glGetUniformLocation(program_, "input_texture"), 1);
alpha_id_ = glGetUniformLocation(program_, "alpha");
beta_id_ = glGetUniformLocation(program_, "beta");
matrix_id_ = glGetUniformLocation(program_, "transform_matrix");
glGenFramebuffers(1, &framebuffer_);
// vertex storage
glGenBuffers(2, vbo_);
glGenVertexArrays(1, &vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicSquareVertices),
mediapipe::kBasicSquareVertices, GL_STATIC_DRAW);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glBufferData(GL_ARRAY_BUFFER, sizeof(mediapipe::kBasicTextureVertices),
mediapipe::kBasicTextureVertices, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
return ::mediapipe::OkStatus();
});
}
Size GetImageSize(const Packet& image_packet) override {
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
return {image.width(), image.height()};
}
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
const RotatedRect& roi,
const Size& output_dims,
float range_min,
float range_max) override {
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
return InvalidArgumentError(
absl::StrCat("Only BGRA/RGBA textures are supported, passed format: ",
static_cast<uint32_t>(input.format())));
}
constexpr int kNumChannels = 3;
Tensor tensor(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
MP_RETURN_IF_ERROR(gl_helper_.RunInGlContext(
[this, &tensor, &input, &roi, &output_dims, range_min,
range_max]() -> ::mediapipe::Status {
auto input_texture = gl_helper_.CreateSourceTexture(input);
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(auto transform,
GetValueRangeTransformation(kInputImageRangeMin,
kInputImageRangeMax,
range_min, range_max));
auto tensor_view = tensor.GetOpenGlTexture2dWriteView();
MP_RETURN_IF_ERROR(ExtractSubRect(input_texture, roi,
/*flip_horizontaly=*/false,
transform.scale, transform.offset,
output_dims, &tensor_view));
return ::mediapipe::OkStatus();
}));
return tensor;
}
::mediapipe::Status ExtractSubRect(const mediapipe::GlTexture& texture,
const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha,
float beta, const Size& output_dims,
Tensor::OpenGlTexture2dView* output) {
std::array<float, 16> transform_mat;
GetRotatedSubRectToRectTransformMatrix(sub_rect, texture.width(),
texture.height(), flip_horizontaly,
&transform_mat);
glDisable(GL_DEPTH_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
glViewport(0, 0, output_dims.width, output_dims.height);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, output->name());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
output->name(), 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(texture.target(), texture.name());
ASSIGN_OR_RETURN(auto overrides, GlParametersOverride::Create(
{{GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE},
{GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE},
{GL_TEXTURE_MIN_FILTER, GL_LINEAR},
{GL_TEXTURE_MAG_FILTER, GL_LINEAR}}));
glUseProgram(program_);
glUniform1f(alpha_id_, alpha);
glUniform1f(beta_id_, beta);
glUniformMatrix4fv(matrix_id_, 1, GL_TRUE, transform_mat.data());
// vao
glBindVertexArray(vao_);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo_[0]);
glEnableVertexAttribArray(kAttribVertex);
glVertexAttribPointer(kAttribVertex, 2, GL_FLOAT, 0, 0, nullptr);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo_[1]);
glEnableVertexAttribArray(kAttribTexturePosition);
glVertexAttribPointer(kAttribTexturePosition, 2, GL_FLOAT, 0, 0, nullptr);
// draw
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// cleanup
glDisableVertexAttribArray(kAttribVertex);
glDisableVertexAttribArray(kAttribTexturePosition);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
return overrides.Revert();
}
~GlProcessor() override {
gl_helper_.RunInGlContext([this]() {
// Release OpenGL resources.
if (framebuffer_ != 0) glDeleteFramebuffers(1, &framebuffer_);
if (program_ != 0) glDeleteProgram(program_);
if (vao_ != 0) glDeleteVertexArrays(1, &vao_);
glDeleteBuffers(2, vbo_);
});
}
private:
mediapipe::GlCalculatorHelper gl_helper_;
GLuint vao_ = 0;
GLuint vbo_[2] = {0, 0};
GLuint program_ = 0;
GLuint framebuffer_ = 0;
GLint alpha_id_ = 0;
GLint beta_id_ = 0;
GLint matrix_id_ = 0;
};
} // namespace
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
bool input_starts_at_bottom) {
auto result = absl::make_unique<GlProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
}
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20

View File

@ -0,0 +1,42 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
// Creates image to tensor (represented as OpenGL texture) converter.
// NOTE: mediapipe::GlCalculatorHelper::UpdateContract invocation must precede
// converter creation.
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
bool input_starts_at_bottom);
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_

View File

@ -0,0 +1,397 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_converter_metal.h"
#if MEDIAPIPE_METAL_ENABLED
#import <Metal/Metal.h>
#include <array>
#include <memory>
#include <vector>
#include "absl/strings/str_cat.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/MPPMetalHelper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
namespace mediapipe {
namespace {
// clang-format off
// a square formed by 2 triangles
const float kBasicSquareVertices[] = {
-1, 1, 0, 1,
1, 1, 0, 1,
1, -1, 0, 1,
-1, 1, 0, 1,
1, -1, 0, 1,
-1, -1, 0, 1,
};
// maps a texture to kBasicSquareVertices via aspect fill
const float kBasicTextureVertices[] = {
0, 0, 0, 1,
1, 0, 0, 1,
1, 1, 0, 1,
0, 0, 0, 1,
1, 1, 0, 1,
0, 1, 0, 1,
};
// clang-format on
constexpr char kShaderLibHeader[] = R"(
#include <metal_stdlib>
using namespace metal;
struct TextureVertex
{
float4 position [[position]];
float2 uv;
};
)";
constexpr char kVertexShader[] = R"(
vertex TextureVertex vertexShader(
constant float4 *position [[buffer(0)]],
device float4* tex_coords [[buffer(1)]],
constant float4x4& transform_matrix [[buffer(2)]],
uint vid [[vertex_id]]) {
TextureVertex vert;
vert.position = position[vid];
vert.uv = (tex_coords[vid] * transform_matrix).xy;
return vert;
}
)";
constexpr char kFragmentShader[] = R"(
#ifdef OUTPUT_F16C4
#define Type4 half4
#define Type half
#endif // OUTPUT_F16C4
#ifdef OUTPUT_F32C4
#define Type4 float4
#define Type float
#endif // OUTPUT_F32C4
fragment Type4 fragmentShader(TextureVertex vertex_output [[stage_in]],
texture2d<Type> texture [[texture(0)]],
constant float* parameters [[buffer(1)]])
{
const float alpha = parameters[0];
const float beta = parameters[1];
constexpr sampler linear_sampler(address::clamp_to_edge, min_filter::linear,
mag_filter::linear);
Type4 texture_pixel = texture.sample(linear_sampler, vertex_output.uv);
return Type4(alpha * texture_pixel.rgb + beta, 0);
}
)";
enum class OutputFormat { kF16C4, kF32C4 };
MTLPixelFormat GetPixelFormat(OutputFormat output_format) {
switch (output_format) {
case OutputFormat::kF16C4:
return MTLPixelFormatRGBA16Float;
case OutputFormat::kF32C4:
return MTLPixelFormatRGBA32Float;
}
}
int GetBytesPerRaw(OutputFormat output_format, const tflite::gpu::HW& size) {
std::size_t type_size;
switch (output_format) {
case OutputFormat::kF16C4:
type_size = sizeof(tflite::gpu::HalfBits);
break;
case OutputFormat::kF32C4:
type_size = sizeof(float);
break;
}
constexpr int kNumChannels = 4;
return size.w * kNumChannels * type_size;
}
class SubRectExtractorMetal {
public:
static ::mediapipe::StatusOr<std::unique_ptr<SubRectExtractorMetal>> Make(
id<MTLDevice> device, OutputFormat output_format) {
id<MTLRenderPipelineState> pipeline_state;
MP_RETURN_IF_ERROR(SubRectExtractorMetal::MakePipelineState(
device, output_format, &pipeline_state));
return absl::make_unique<SubRectExtractorMetal>(device, pipeline_state,
output_format);
}
SubRectExtractorMetal(id<MTLDevice> device,
id<MTLRenderPipelineState> pipeline_state,
OutputFormat output_format)
: device_(device),
pipeline_state_(pipeline_state),
output_format_(output_format) {
positions_buffer_ =
[device_ newBufferWithBytes:kBasicSquareVertices
length:sizeof(kBasicSquareVertices)
options:MTLResourceOptionCPUCacheModeDefault];
tex_coords_buffer_ =
[device_ newBufferWithBytes:kBasicTextureVertices
length:sizeof(kBasicTextureVertices)
options:MTLResourceOptionCPUCacheModeDefault];
transform_mat_buffer_ =
[device_ newBufferWithBytes:&transform_mat_
length:sizeof(transform_mat_)
options:MTLResourceOptionCPUCacheModeDefault];
}
::mediapipe::Status Execute(id<MTLTexture> input_texture,
const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha, float beta,
const tflite::gpu::HW& destination_size,
id<MTLCommandBuffer> command_buffer,
id<MTLBuffer> destination) {
auto output_texture = MTLTextureWithBuffer(destination_size, destination);
return InternalExecute(input_texture, sub_rect, flip_horizontaly, alpha,
beta, destination_size, command_buffer,
output_texture);
}
private:
id<MTLTexture> MTLTextureWithBuffer(const tflite::gpu::HW& size,
id<MTLBuffer> buffer) {
MTLTextureDescriptor* texture_desc = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:GetPixelFormat(output_format_)
width:size.w
height:size.h
mipmapped:NO];
texture_desc.usage = MTLTextureUsageRenderTarget;
NSUInteger output_bytes_per_row = GetBytesPerRaw(output_format_, size);
id<MTLTexture> texture =
[buffer newTextureWithDescriptor:texture_desc
offset:0
bytesPerRow:output_bytes_per_row];
return texture;
}
::mediapipe::Status InternalExecute(id<MTLTexture> input_texture,
const RotatedRect& sub_rect,
bool flip_horizontaly, float alpha,
float beta,
const tflite::gpu::HW& destination_size,
id<MTLCommandBuffer> command_buffer,
id<MTLTexture> output_texture) {
RET_CHECK(command_buffer != nil);
RET_CHECK(output_texture != nil);
// Obtain texture mapping coordinates transformation matrix and copy its
// data to the buffer.
GetRotatedSubRectToRectTransformMatrix(sub_rect, input_texture.width,
input_texture.height,
flip_horizontaly, &transform_mat_);
std::memcpy(reinterpret_cast<float*>(transform_mat_buffer_.contents),
transform_mat_.data(), sizeof(transform_mat_));
// Create parameters wrapper.
float parameters[] = {alpha, beta};
// Now everything is ready to go!
// Setup render pass.
MTLRenderPassDescriptor* render_pass_desc =
[MTLRenderPassDescriptor renderPassDescriptor];
render_pass_desc.colorAttachments[0].texture = output_texture;
render_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore;
render_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear;
// Setup render command encoder.
id<MTLRenderCommandEncoder> command_encoder =
[command_buffer renderCommandEncoderWithDescriptor:render_pass_desc];
[command_encoder setRenderPipelineState:pipeline_state_];
[command_encoder setVertexBuffer:positions_buffer_ offset:0 atIndex:0];
[command_encoder setVertexBuffer:tex_coords_buffer_ offset:0 atIndex:1];
[command_encoder setVertexBuffer:transform_mat_buffer_ offset:0 atIndex:2];
[command_encoder setFragmentTexture:input_texture atIndex:0];
[command_encoder setFragmentBytes:&parameters
length:sizeof(parameters)
atIndex:1];
[command_encoder drawPrimitives:MTLPrimitiveTypeTriangle
vertexStart:0
vertexCount:6];
[command_encoder endEncoding];
return ::mediapipe::OkStatus();
}
static ::mediapipe::Status MakePipelineState(
id<MTLDevice> device, OutputFormat output_format,
id<MTLRenderPipelineState>* pipeline_state) {
RET_CHECK(pipeline_state != nil);
std::string output_type_def;
MTLPixelFormat pixel_format;
switch (output_format) {
case OutputFormat::kF16C4:
output_type_def = R"(
#define OUTPUT_F16C4
)";
break;
case OutputFormat::kF32C4:
output_type_def = R"(
#define OUTPUT_F32C4
)";
break;
}
std::string shader_lib = absl::StrCat(kShaderLibHeader, output_type_def,
kVertexShader, kFragmentShader);
NSError* error = nil;
NSString* library_source =
[NSString stringWithUTF8String:shader_lib.c_str()];
id<MTLLibrary> library =
[device newLibraryWithSource:library_source options:nil error:&error];
RET_CHECK(library != nil) << "Couldn't create a shader library"
<< [[error localizedDescription] UTF8String];
id<MTLFunction> vertex_function =
[library newFunctionWithName:@"vertexShader"];
RET_CHECK(vertex_function != nil)
<< "Failed creating a new vertex function!";
id<MTLFunction> fragment_function =
[library newFunctionWithName:@"fragmentShader"];
RET_CHECK(fragment_function != nil)
<< "Failed creating a new fragment function!";
MTLRenderPipelineDescriptor* pipelineDescriptor =
[MTLRenderPipelineDescriptor new];
pipelineDescriptor.vertexFunction = vertex_function;
pipelineDescriptor.fragmentFunction = fragment_function;
pipelineDescriptor.colorAttachments[0].pixelFormat =
GetPixelFormat(output_format);
*pipeline_state =
[device newRenderPipelineStateWithDescriptor:pipelineDescriptor
error:&error];
RET_CHECK(error == nil) << "Couldn't create a pipeline state"
<< [[error localizedDescription] UTF8String];
return ::mediapipe::OkStatus();
}
id<MTLBuffer> positions_buffer_;
id<MTLBuffer> tex_coords_buffer_;
id<MTLBuffer> transform_mat_buffer_;
id<MTLDevice> device_;
id<MTLRenderPipelineState> pipeline_state_;
std::array<float, 16> transform_mat_;
OutputFormat output_format_;
};
class MetalProcessor : public ImageToTensorConverter {
public:
::mediapipe::Status Init(CalculatorContext* cc) {
metal_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(metal_helper_);
ASSIGN_OR_RETURN(extractor_,
SubRectExtractorMetal::Make(metal_helper_.mtlDevice,
OutputFormat::kF32C4));
return ::mediapipe::OkStatus();
}
Size GetImageSize(const Packet& image_packet) override {
const auto& image = image_packet.Get<mediapipe::GpuBuffer>();
return {image.width(), image.height()};
}
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
const RotatedRect& roi,
const Size& output_dims,
float range_min,
float range_max) override {
const auto& input = image_packet.Get<mediapipe::GpuBuffer>();
if (input.format() != mediapipe::GpuBufferFormat::kBGRA32) {
return InvalidArgumentError(
absl::StrCat("Only BGRA/RGBA textures are supported, passed "
"format: ",
static_cast<uint32_t>(input.format())));
}
@autoreleasepool {
id<MTLTexture> texture = [metal_helper_ metalTextureWithGpuBuffer:input];
constexpr int kNumChannels = 4;
Tensor tensor(Tensor::ElementType::kFloat32,
Tensor::Shape{1, output_dims.height, output_dims.width,
kNumChannels});
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 1.0f;
ASSIGN_OR_RETURN(
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
id<MTLCommandBuffer> command_buffer = [metal_helper_ commandBuffer];
const auto& buffer_view = tensor.GetMtlBufferWriteView(command_buffer);
MP_RETURN_IF_ERROR(extractor_->Execute(
texture, roi,
/*flip_horizontaly=*/false, transform.scale, transform.offset,
tflite::gpu::HW(output_dims.height, output_dims.width),
command_buffer, buffer_view.buffer()));
[command_buffer commit];
// TODO: consider removing waitUntilCompleted
[command_buffer waitUntilCompleted];
return tensor;
}
}
private:
MPPMetalHelper* metal_helper_ = nil;
std::unique_ptr<SubRectExtractorMetal> extractor_;
};
} // namespace
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateMetalConverter(CalculatorContext* cc) {
auto result = absl::make_unique<MetalProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
}
} // namespace mediapipe
#endif // MEDIAPIPE_METAL_ENABLED

View File

@ -0,0 +1,40 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_METAL_ENABLED
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
// Creates Metal image-to-tensor converter.
// NOTE: [MPPMetalHelper updateContract:...] invocation must precede
// converter creation.
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateMetalConverter(CalculatorContext* cc);
} // namespace mediapipe
#endif // MEDIAPIPE_METAL_ENABLED
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_METAL_H_

View File

@ -0,0 +1,116 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
#include <cmath>
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
namespace {
class OpenCvProcessor : public ImageToTensorConverter {
public:
Size GetImageSize(const Packet& image_packet) override {
const auto& image = image_packet.Get<mediapipe::ImageFrame>();
return {image.Width(), image.Height()};
}
::mediapipe::StatusOr<Tensor> Convert(const Packet& image_packet,
const RotatedRect& roi,
const Size& output_dims,
float range_min,
float range_max) override {
const auto& input = image_packet.Get<mediapipe::ImageFrame>();
if (input.Format() != mediapipe::ImageFormat::SRGB &&
input.Format() != mediapipe::ImageFormat::SRGBA) {
return InvalidArgumentError(
absl::StrCat("Only RGBA/RGB formats are supported, passed format: ",
static_cast<uint32_t>(input.Format())));
}
cv::Mat src = mediapipe::formats::MatView(&input);
constexpr int kNumChannels = 3;
Tensor tensor(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
auto buffer_view = tensor.GetCpuWriteView();
cv::Mat dst(output_dims.height, output_dims.width, CV_32FC3,
buffer_view.buffer<float>());
const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y),
cv::Size2f(roi.width, roi.height),
roi.rotation * 180.f / M_PI);
cv::Mat src_points;
cv::boxPoints(rotated_rect, src_points);
const float dst_width = output_dims.width;
const float dst_height = output_dims.height;
/* clang-format off */
float dst_corners[8] = {0.0f, dst_height,
0.0f, 0.0f,
dst_width, 0.0f,
dst_width, dst_height};
/* clang-format on */
cv::Mat dst_points = cv::Mat(4, 2, CV_32F, dst_corners);
cv::Mat projection_matrix =
cv::getPerspectiveTransform(src_points, dst_points);
cv::Mat transformed;
cv::warpPerspective(src, transformed, projection_matrix,
cv::Size(dst_width, dst_height),
/*flags=*/cv::INTER_LINEAR,
/*borderMode=*/cv::BORDER_REPLICATE);
if (transformed.channels() > kNumChannels) {
cv::Mat proper_channels_mat;
cv::cvtColor(transformed, proper_channels_mat, cv::COLOR_RGBA2RGB);
transformed = proper_channels_mat;
}
constexpr float kInputImageRangeMin = 0.0f;
constexpr float kInputImageRangeMax = 255.0f;
ASSIGN_OR_RETURN(
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
return tensor;
}
};
} // namespace
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateOpenCvConverter(CalculatorContext* cc) {
// Simply "return absl::make_unique<OpenCvProcessor>()" failed to build on
// macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(
absl::make_unique<OpenCvProcessor>());
}
} // namespace mediapipe

View File

@ -0,0 +1,32 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_
#include <memory>
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
// Creates OpenCV image-to-tensor converter.
::mediapipe::StatusOr<std::unique_ptr<ImageToTensorConverter>>
CreateOpenCvConverter(CalculatorContext* cc);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_OPENCV_H_

View File

@ -0,0 +1,176 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include <array>
#include "absl/types/optional.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
RotatedRect GetRoi(int input_width, int input_height,
absl::optional<mediapipe::NormalizedRect> norm_rect) {
if (norm_rect) {
return {.center_x = norm_rect->x_center() * input_width,
.center_y = norm_rect->y_center() * input_height,
.width = norm_rect->width() * input_width,
.height = norm_rect->height() * input_height,
.rotation = norm_rect->rotation()};
}
return {.center_x = 0.5f * input_width,
.center_y = 0.5f * input_height,
.width = static_cast<float>(input_width),
.height = static_cast<float>(input_height),
.rotation = 0};
}
::mediapipe::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
int input_tensor_height,
bool keep_aspect_ratio,
RotatedRect* roi) {
if (!keep_aspect_ratio) {
return std::array<float, 4>{0.0f, 0.0f, 0.0f, 0.0f};
}
RET_CHECK(input_tensor_width > 0 && input_tensor_height > 0)
<< "Input tensor width and height must be > 0.";
const float tensor_aspect_ratio =
static_cast<float>(input_tensor_height) / input_tensor_width;
RET_CHECK(roi->width > 0 && roi->height > 0)
<< "ROI width and height must be > 0.";
const float roi_aspect_ratio = roi->height / roi->width;
float vertical_padding = 0.0f;
float horizontal_padding = 0.0f;
float new_width;
float new_height;
if (tensor_aspect_ratio > roi_aspect_ratio) {
new_width = roi->width;
new_height = roi->width * tensor_aspect_ratio;
vertical_padding = (1.0f - roi_aspect_ratio / tensor_aspect_ratio) / 2.0f;
} else {
new_width = roi->height / tensor_aspect_ratio;
new_height = roi->height;
horizontal_padding = (1.0f - tensor_aspect_ratio / roi_aspect_ratio) / 2.0f;
}
roi->width = new_width;
roi->height = new_height;
return std::array<float, 4>{horizontal_padding, vertical_padding,
horizontal_padding, vertical_padding};
}
::mediapipe::StatusOr<ValueTransformation> GetValueRangeTransformation(
float from_range_min, float from_range_max, float to_range_min,
float to_range_max) {
RET_CHECK_LT(from_range_min, from_range_max)
<< "Invalid FROM range: min >= max.";
RET_CHECK_LT(to_range_min, to_range_max) << "Invalid TO range: min >= max.";
const float scale =
(to_range_max - to_range_min) / (from_range_max - from_range_min);
const float offset = to_range_min - from_range_min * scale;
return ValueTransformation{scale, offset};
}
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
int rect_width, int rect_height,
bool flip_horizontaly,
std::array<float, 16>* matrix_ptr) {
std::array<float, 16>& matrix = *matrix_ptr;
// The resulting matrix is multiplication of below commented out matrices:
// post_scale_matrix
// * translate_matrix
// * rotate_matrix
// * flip_matrix
// * scale_matrix
// * initial_translate_matrix
// Matrix to convert X,Y to [-0.5, 0.5] range "initial_translate_matrix"
// { 1.0f, 0.0f, 0.0f, -0.5f}
// { 0.0f, 1.0f, 0.0f, -0.5f}
// { 0.0f, 0.0f, 1.0f, 0.0f}
// { 0.0f, 0.0f, 0.0f, 1.0f}
const float a = sub_rect.width;
const float b = sub_rect.height;
// Matrix to scale X,Y,Z to sub rect "scale_matrix"
// Z has the same scale as X.
// { a, 0.0f, 0.0f, 0.0f}
// {0.0f, b, 0.0f, 0.0f}
// {0.0f, 0.0f, a, 0.0f}
// {0.0f, 0.0f, 0.0f, 1.0f}
const float flip = flip_horizontaly ? -1 : 1;
// Matrix for optional horizontal flip around middle of output image.
// { fl , 0.0f, 0.0f, 0.0f}
// { 0.0f, 1.0f, 0.0f, 0.0f}
// { 0.0f, 0.0f, 1.0f, 0.0f}
// { 0.0f, 0.0f, 0.0f, 1.0f}
const float c = std::cos(sub_rect.rotation);
const float d = std::sin(sub_rect.rotation);
// Matrix to do rotation around Z axis "rotate_matrix"
// { c, -d, 0.0f, 0.0f}
// { d, c, 0.0f, 0.0f}
// { 0.0f, 0.0f, 1.0f, 0.0f}
// { 0.0f, 0.0f, 0.0f, 1.0f}
const float e = sub_rect.center_x;
const float f = sub_rect.center_y;
// Matrix to do X,Y translation of sub rect within parent rect
// "translate_matrix"
// {1.0f, 0.0f, 0.0f, e }
// {0.0f, 1.0f, 0.0f, f }
// {0.0f, 0.0f, 1.0f, 0.0f}
// {0.0f, 0.0f, 0.0f, 1.0f}
const float g = 1.0f / rect_width;
const float h = 1.0f / rect_height;
// Matrix to scale X,Y,Z to [0.0, 1.0] range "post_scale_matrix"
// {g, 0.0f, 0.0f, 0.0f}
// {0.0f, h, 0.0f, 0.0f}
// {0.0f, 0.0f, g, 0.0f}
// {0.0f, 0.0f, 0.0f, 1.0f}
// row 1
matrix[0] = a * c * flip * g;
matrix[1] = -b * d * g;
matrix[2] = 0.0f;
matrix[3] = (-0.5f * a * c * flip + 0.5f * b * d + e) * g;
// row 2
matrix[4] = a * d * flip * h;
matrix[5] = b * c * h;
matrix[6] = 0.0f;
matrix[7] = (-0.5f * b * c - 0.5f * a * d * flip + f) * h;
// row 3
matrix[8] = 0.0f;
matrix[9] = 0.0f;
matrix[10] = a * g;
matrix[11] = 0.0f;
// row 4
matrix[12] = 0.0f;
matrix[13] = 0.0f;
matrix[14] = 0.0f;
matrix[15] = 1.0f;
}
} // namespace mediapipe

View File

@ -0,0 +1,82 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
#include <array>
#include "absl/types/optional.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
struct RotatedRect {
float center_x;
float center_y;
float width;
float height;
float rotation;
};
// Generates a new ROI or converts it from normalized rect.
RotatedRect GetRoi(int input_width, int input_height,
absl::optional<mediapipe::NormalizedRect> norm_rect);
// Pads ROI, so extraction happens correctly if aspect ratio is to be kept.
// Returns letterbox padding applied.
::mediapipe::StatusOr<std::array<float, 4>> PadRoi(int input_tensor_width,
int input_tensor_height,
bool keep_aspect_ratio,
RotatedRect* roi);
// Represents a transformation of value which involves scaling and offsetting.
// To apply transformation:
// ValueTransformation transform = ...
// float transformed_value = transform.scale * value + transfrom.offset;
struct ValueTransformation {
float scale;
float offset;
};
// Returns value transformation to apply to a value in order to convert it from
// [from_range_min, from_range_max] into [to_range_min, to_range_max] range.
// from_range_min must be less than from_range_max
// to_range_min must be less than to_range_max
::mediapipe::StatusOr<ValueTransformation> GetValueRangeTransformation(
float from_range_min, float from_range_max, float to_range_min,
float to_range_max);
// Populates 4x4 "matrix" with row major order transformation matrix which
// maps (x, y) in range [0, 1] (describing points of @sub_rect)
// to (x', y') in range [0, 1]*** (describing points of a rect:
// [0, @rect_width] x [0, @rect_height] = RECT).
//
// *** (x', y') will go out of the range for points from @sub_rect
// which are not contained by RECT and it's expected behavior
//
// @sub_rect - rotated sub rect in absolute coordinates
// @rect_width - rect width
// @rect_height - rect height
// @flip_horizontaly - we need to flip the output buffer.
// @matrix - 4x4 matrix (array of 16 elements) to populate
void GetRotatedSubRectToRectTransformMatrix(const RotatedRect& sub_rect,
int rect_width, int rect_height,
bool flip_horizontaly,
std::array<float, 16>* matrix);
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_

View File

@ -0,0 +1,161 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
testing::Matcher<RotatedRect> EqRotatedRect(float width, float height,
float center_x, float center_y,
float rotation) {
return testing::AllOf(
testing::Field(&RotatedRect::width, testing::FloatEq(width)),
testing::Field(&RotatedRect::height, testing::FloatEq(height)),
testing::Field(&RotatedRect::center_x, testing::FloatEq(center_x)),
testing::Field(&RotatedRect::center_y, testing::FloatEq(center_y)),
testing::Field(&RotatedRect::rotation, testing::FloatEq(rotation)));
}
TEST(GetRoi, NoNormRect) {
EXPECT_THAT(GetRoi(4, 4, {}), EqRotatedRect(4, 4, 2, 2, 0));
EXPECT_THAT(GetRoi(25, 15, {}), EqRotatedRect(25, 15, 12.5f, 7.5f, 0));
}
TEST(GetRoi, WholeImageNormRect) {
mediapipe::NormalizedRect norm_rect;
norm_rect.set_width(1.0f);
norm_rect.set_height(1.0f);
norm_rect.set_x_center(0.5f);
norm_rect.set_y_center(0.5f);
norm_rect.set_rotation(0.0f);
EXPECT_THAT(GetRoi(4, 4, norm_rect), EqRotatedRect(4, 4, 2, 2, 0));
EXPECT_THAT(GetRoi(25, 15, norm_rect), EqRotatedRect(25, 15, 12.5f, 7.5f, 0));
}
TEST(GetRoi, ExpandedNormRect) {
mediapipe::NormalizedRect norm_rect;
norm_rect.set_width(4.0f);
norm_rect.set_height(2.0f);
norm_rect.set_x_center(0.5f);
norm_rect.set_y_center(1.0f);
norm_rect.set_rotation(3.0f);
EXPECT_THAT(GetRoi(4, 4, norm_rect), EqRotatedRect(16, 8, 2, 4, 3));
EXPECT_THAT(GetRoi(25, 15, norm_rect), EqRotatedRect(100, 30, 12.5f, 15, 3));
}
TEST(PadRoi, NoPadding) {
RotatedRect roi{.center_x = 20,
.center_y = 10,
.width = 100,
.height = 200,
.rotation = 5};
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/false, &roi);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
ElementsAreArray({0.0f, 0.0f, 0.0f, 0.0f}));
EXPECT_THAT(roi, EqRotatedRect(100, 200, 20, 10, 5));
}
TEST(PadRoi, HorizontalPadding) {
RotatedRect roi{.center_x = 20,
.center_y = 10,
.width = 100,
.height = 200,
.rotation = 5};
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/true, &roi);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
ElementsAreArray({0.25f, 0.0f, 0.25f, 0.0f}));
EXPECT_THAT(roi, EqRotatedRect(200, 200, 20, 10, 5));
}
TEST(PadRoi, VerticalPadding) {
RotatedRect roi{
.center_x = 1, .center_y = 2, .width = 21, .height = 19, .rotation = 3};
const float expected_horizontal_padding = (21 - 19) / 2.0f / 21;
auto status_or_value = PadRoi(10, 10, /*keep_aspect_ratio=*/true, &roi);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(
status_or_value.ValueOrDie(),
ElementsAre(testing::FloatEq(0.0f),
testing::FloatNear(expected_horizontal_padding, 1e-6),
testing::FloatEq(0.0f),
testing::FloatNear(expected_horizontal_padding, 1e-6)));
EXPECT_THAT(roi, EqRotatedRect(21, 21, 1, 2, 3));
}
testing::Matcher<ValueTransformation> EqValueTransformation(float scale,
float offset) {
return ::testing::AllOf(
testing::Field(&ValueTransformation::scale, testing::FloatEq(scale)),
testing::Field(&ValueTransformation::offset, testing::FloatEq(offset)));
}
TEST(GetValueRangeTransformation, PixelToFloatZeroCenter) {
auto status_or_value = GetValueRangeTransformation(
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
/*to_range_min=*/-1.0f, /*to_range_max=*/1.0f);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
EqValueTransformation(/*scale=*/2 / 255.0f,
/*offset=*/-1.0f));
}
TEST(GetValueRangeTransformation, PixelToFloat) {
auto status_or_value = GetValueRangeTransformation(
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
/*to_range_min=*/0.0f, /*to_range_max=*/1.0f);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
EqValueTransformation(/*scale=*/1 / 255.0f,
/*offset=*/0.0f));
}
TEST(GetValueRangeTransformation, FloatToFloatNoOp) {
auto status_or_value = GetValueRangeTransformation(
/*from_range_min=*/0.0f, /*from_range_max=*/1.0f,
/*to_range_min=*/0.0f, /*to_range_max=*/1.0f);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
EqValueTransformation(/*scale=*/1.0f, /*offset=*/0.0f));
}
TEST(GetValueRangeTransformation, PixelToPixelNoOp) {
auto status_or_value = GetValueRangeTransformation(
/*from_range_min=*/0.0f, /*from_range_max=*/255.0f,
/*to_range_min=*/0.0f, /*to_range_max=*/255.0f);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
EqValueTransformation(/*scale=*/1.0f, /*offset=*/0.0f));
}
TEST(GetValueRangeTransformation, FloatToPixel) {
auto status_or_value = GetValueRangeTransformation(
/*from_range_min=*/0.0f, /*from_range_max=*/1.0f,
/*to_range_min=*/0.0f, /*to_range_max=*/255.0f);
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(),
EqValueTransformation(/*scale=*/255.0f, /*offset=*/0.0f));
}
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,832 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstring>
#include <memory>
#include <string>
#include <vector>
#include "absl/memory/memory.h"
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/tflite/config.h"
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
#include "mediapipe/util/cpu_util.h"
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
#include "mediapipe/util/resource_util.h"
#include "tensorflow/lite/error_reporter.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#if defined(MEDIAPIPE_ANDROID)
#include "mediapipe/util/android/file/base/file.h"
#include "mediapipe/util/android/file/base/filesystem.h"
#include "mediapipe/util/android/file/base/helpers.h"
#endif // ANDROID
#if MEDIAPIPE_TFLITE_GL_INFERENCE
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/util/tflite/tflite_gpu_runner.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
#if MEDIAPIPE_TFLITE_METAL_INFERENCE
#import <CoreVideo/CoreVideo.h>
#import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
#import "mediapipe/gpu/MPPMetalHelper.h"
#include "mediapipe/gpu/MPPMetalUtil.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h"
#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
#endif // MEDIAPIPE_TFLITE_METAL_INFERENCE
#if !defined(MEDIAPIPE_EDGE_TPU)
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
#endif // !EDGETPU
#if defined(MEDIAPIPE_ANDROID)
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#endif // ANDROID
namespace {
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) { // NOLINT
return (size + group_size - 1) / group_size;
}
// Round up n to next multiple of m.
template <typename T>
T RoundUp(T n, T m) {
return ((n + m - T{1}) / m) * m;
}
bool ShouldUseGpu(const mediapipe::InferenceCalculatorOptions& options) {
return (
!options.has_delegate() || // Use GPU delegate if delegate not specified
(options.has_delegate() && options.delegate().has_gpu()));
}
constexpr char kTensorsTag[] = "TENSORS";
} // namespace
#if defined(MEDIAPIPE_EDGE_TPU)
#include "edgetpu.h"
// Creates and returns an Edge TPU interpreter to run the given edgetpu model.
std::unique_ptr<tflite::Interpreter> BuildEdgeTpuInterpreter(
const tflite::FlatBufferModel& model,
tflite::ops::builtin::BuiltinOpResolver* resolver,
edgetpu::EdgeTpuContext* edgetpu_context) {
resolver->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
std::unique_ptr<tflite::Interpreter> interpreter;
if (tflite::InterpreterBuilder(model, *resolver)(&interpreter) != kTfLiteOk) {
std::cerr << "Failed to build edge TPU interpreter." << std::endl;
}
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context);
interpreter->SetNumThreads(1);
if (interpreter->AllocateTensors() != kTfLiteOk) {
std::cerr << "Failed to allocate edge TPU tensors." << std::endl;
}
return interpreter;
}
#endif // MEDIAPIPE_EDGE_TPU
namespace mediapipe {
#if MEDIAPIPE_TFLITE_METAL_INFERENCE
namespace {
tflite::gpu::BHWC BhwcFromTensorShape(const Tensor::Shape& shape) {
tflite::gpu::BHWC result;
result.b = shape.dims[0];
switch (shape.dims.size()) {
case 1:
// result.b is already filled.
break;
case 2:
result.h = 1;
result.w = 1;
result.c = shape.dims[1];
break;
case 3:
result.h = 1;
result.w = shape.dims[1];
result.c = shape.dims[2];
break;
case 4:
result.h = shape.dims[1];
result.w = shape.dims[2];
result.c = shape.dims[3];
break;
default:
// Handles 0 and >4.
LOG(FATAL)
<< "Dimensions size must be in range [1,4] for GPU inference, but "
<< shape.dims.size() << " is provided";
}
return result;
}
} // namespace
#endif // MEDIAPIPE_TFLITE_METAL_INFERENCE
// Returns number of threads to configure XNNPACK delegate with.
// (Equal to user provided value if specified. Otherwise, it returns number of
// high cores (hard-coded to 1 for Emscripten without Threads extension))
int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
static constexpr int kDefaultNumThreads = -1;
if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
return opts.delegate().xnnpack().num_threads();
}
#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
return InferHigherCoreIds().size();
#else
return 1;
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
}
// Calculator Header Section
// Runs inference on the provided input Tensors and TFLite model.
//
// Creates an interpreter with given model and calls invoke().
// Optionally run inference on CPU/GPU.
//
// This calculator can be used with TensorConverterCalculator to get the
// appropriate inputs.
//
// When the input tensors are on CPU, gpu inference is optional and can be
// specified in the calculator options.
// When the input tensors are on GPU, inference is GPU and output can be CPU or
// GPU.
//
// Input:
// TENSORS - Vector of Tensors
//
// Output:
// TENSORS - Vector of Tensors
//
// Input side packet:
// CUSTOM_OP_RESOLVER (optional) - Use a custom op resolver,
// instead of the builtin one.
// MODEL (optional) - Use to specify TfLite model
// (std::unique_ptr<tflite::FlatBufferModel,
// std::function<void(tflite::FlatBufferModel*)>>)
//
// Example use:
// node {
// calculator: "InferenceCalculator"
// input_stream: "TENSORS:tensor_image"
// output_stream: "TENSORS:tensors"
// options: {
// [mediapipe.InferenceCalculatorOptions.ext] {
// model_path: "modelname.tflite"
// }
// }
// }
//
// or
//
// node {
// calculator: "InferenceCalculator"
// input_stream: "TENSORS:tensor_image"
// input_side_packet: "MODEL:model"
// output_stream: "TENSORS:tensors"
// options: {
// [mediapipe.InferenceCalculatorOptions.ext] {
// model_path: "modelname.tflite"
// delegate { gpu {} }
// }
// }
// }
//
// IMPORTANT Notes:
// Tensors are assumed to be ordered correctly (sequentially added to model).
// Input tensors are assumed to be of the correct size and already normalized.
class InferenceCalculator : public CalculatorBase {
public:
using TfLiteDelegatePtr =
std::unique_ptr<TfLiteDelegate, std::function<void(TfLiteDelegate*)>>;
using TfLiteModelPtr =
std::unique_ptr<tflite::FlatBufferModel,
std::function<void(tflite::FlatBufferModel*)>>;
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
private:
::mediapipe::Status ReadKernelsFromFile();
::mediapipe::Status WriteKernelsToFile();
::mediapipe::Status LoadModel(CalculatorContext* cc);
::mediapipe::StatusOr<Packet> GetModelAsPacket(const CalculatorContext& cc);
::mediapipe::Status LoadDelegate(CalculatorContext* cc);
::mediapipe::Status InitTFLiteGPURunner(CalculatorContext* cc);
Packet model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteDelegatePtr delegate_;
#if MEDIAPIPE_TFLITE_GL_INFERENCE
mediapipe::GlCalculatorHelper gpu_helper_;
std::unique_ptr<tflite::gpu::TFLiteGPURunner> tflite_gpu_runner_;
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
MPPMetalHelper* gpu_helper_ = nullptr;
TFLBufferConvert* converter_to_BPHWC4_ = nil;
TFLBufferConvert* converter_from_BPHWC4_ = nil;
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
std::vector<Tensor::Shape> output_shapes_;
std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_;
std::vector<std::unique_ptr<Tensor>> gpu_buffers_out_;
#endif // MEDIAPIPE_TFLITE_GPU_SUPPORTED
#if defined(MEDIAPIPE_EDGE_TPU)
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_ =
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
#endif
bool use_advanced_gpu_api_ = false;
bool use_gpu_delegate_ = false;
bool use_kernel_caching_ = false;
std::string cached_kernel_filename_;
};
REGISTER_CALCULATOR(InferenceCalculator);
::mediapipe::Status InferenceCalculator::GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag(kTensorsTag));
cc->Inputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
RET_CHECK(cc->Outputs().HasTag(kTensorsTag));
cc->Outputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
RET_CHECK(!options.model_path().empty() ^
cc->InputSidePackets().HasTag("MODEL"))
<< "Either model as side packet or model path in options is required.";
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER")
.Set<tflite::ops::builtin::BuiltinOpResolver>();
}
if (cc->InputSidePackets().HasTag("MODEL")) {
cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
}
if (ShouldUseGpu(options)) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#endif
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
#if MEDIAPIPE_TFLITE_GL_INFERENCE || MEDIAPIPE_TFLITE_METAL_INFERENCE
const auto& options = cc->Options<::mediapipe::InferenceCalculatorOptions>();
if (ShouldUseGpu(options)) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
use_advanced_gpu_api_ = options.has_delegate() &&
options.delegate().has_gpu() &&
options.delegate().gpu().use_advanced_gpu_api();
use_kernel_caching_ =
use_advanced_gpu_api_ && options.delegate().gpu().use_kernel_caching();
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
use_gpu_delegate_ = !use_advanced_gpu_api_;
}
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE || MEDIAPIPE_TFLITE_METAL_INFERENCE
if (use_kernel_caching_) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
cached_kernel_filename_ =
"/sdcard/" + mediapipe::File::Basename(options.model_path()) + ".ker";
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
}
// When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
// for everything.
if (!use_advanced_gpu_api_) {
MP_RETURN_IF_ERROR(LoadModel(cc));
}
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
return use_advanced_gpu_api_ ? InitTFLiteGPURunner(cc)
: LoadDelegate(cc);
}));
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(gpu_helper_);
MP_RETURN_IF_ERROR(LoadDelegate(cc));
#endif
} else {
MP_RETURN_IF_ERROR(LoadDelegate(cc));
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag(kTensorsTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& input_tensors =
cc->Inputs().Tag(kTensorsTag).Get<std::vector<Tensor>>();
RET_CHECK(!input_tensors.empty());
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
if (use_advanced_gpu_api_) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors, &output_tensors]() -> ::mediapipe::Status {
for (int i = 0; i < input_tensors.size(); ++i) {
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToInputTensor(
input_tensors[i].GetOpenGlBufferReadView().name(), i));
}
output_tensors->reserve(output_shapes_.size());
for (int i = 0; i < output_shapes_.size(); ++i) {
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
output_shapes_[i]);
MP_RETURN_IF_ERROR(tflite_gpu_runner_->BindSSBOToOutputTensor(
output_tensors->back().GetOpenGlBufferWriteView().name(), i));
}
return ::mediapipe::OkStatus();
}));
} else {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &input_tensors]() -> ::mediapipe::Status {
// Explicitly copy input.
for (int i = 0; i < input_tensors.size(); ++i) {
glBindBuffer(GL_COPY_READ_BUFFER,
input_tensors[i].GetOpenGlBufferReadView().name());
glBindBuffer(
GL_COPY_WRITE_BUFFER,
gpu_buffers_in_[i]->GetOpenGlBufferWriteView().name());
glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0,
0, input_tensors[i].bytes());
}
return ::mediapipe::OkStatus();
}));
}
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
// Explicit copy input with conversion float 32 bits to 16 bits.
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"InferenceCalculatorConvert";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
for (int i = 0; i < input_tensors.size(); ++i) {
auto input_view = input_tensors[i].GetMtlBufferReadView(command_buffer);
// Reshape tensor.
tflite::gpu::BHWC shape = BhwcFromTensorShape(input_tensors[i].shape());
auto gpu_buffer_view =
gpu_buffers_in_[i]->GetMtlBufferWriteView(command_buffer);
[converter_to_BPHWC4_ convertWithEncoder:compute_encoder
shape:shape
sourceBuffer:input_view.buffer()
convertedBuffer:gpu_buffer_view.buffer()];
}
[compute_encoder endEncoding];
[command_buffer commit];
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
} else {
// Read CPU input into tensors.
for (int i = 0; i < input_tensors.size(); ++i) {
const Tensor* input_tensor = &input_tensors[i];
auto input_tensor_view = input_tensor->GetCpuReadView();
auto input_tensor_buffer = input_tensor_view.buffer<float>();
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i);
std::memcpy(local_tensor_buffer, input_tensor_buffer,
input_tensor->bytes());
}
}
// Run inference.
#if MEDIAPIPE_TFLITE_GL_INFERENCE
if (use_advanced_gpu_api_) {
RET_CHECK(tflite_gpu_runner_->Invoke().ok());
} else {
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
}
#else
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
if (use_gpu_delegate_ || use_advanced_gpu_api_) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
if (use_gpu_delegate_) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &output_tensors]() -> ::mediapipe::Status {
output_tensors->reserve(output_shapes_.size());
for (int i = 0; i < output_shapes_.size(); ++i) {
const auto& t = gpu_buffers_out_[i];
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
gpu_buffers_out_[i]->shape());
auto read_view = t->GetOpenGlBufferReadView();
glBindBuffer(GL_COPY_READ_BUFFER, read_view.name());
auto write_view =
output_tensors->back().GetOpenGlBufferWriteView();
glBindBuffer(GL_COPY_WRITE_BUFFER, write_view.name());
glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0,
0, t->bytes());
}
return ::mediapipe::OkStatus();
}));
}
// Output tensors are already bound if use_advanced_gpu_api_ is true.
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"InferenceBPHWC4Convert";
id<MTLComputeCommandEncoder> convert_command =
[command_buffer computeCommandEncoder];
output_tensors->reserve(output_shapes_.size());
for (int i = 0; i < output_shapes_.size(); ++i) {
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
output_shapes_[i]);
// Reshape tensor.
tflite::gpu::BHWC shape = BhwcFromTensorShape(output_shapes_[i]);
auto read_view =
gpu_buffers_out_[i]->GetMtlBufferReadView(command_buffer);
auto write_view =
output_tensors->at(i).GetMtlBufferWriteView(command_buffer);
[converter_from_BPHWC4_ convertWithEncoder:convert_command
shape:shape
sourceBuffer:read_view.buffer()
convertedBuffer:write_view.buffer()];
}
[convert_command endEncoding];
[command_buffer commit];
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
} else {
// Output result tensors (CPU).
const auto& tensor_indexes = interpreter_->outputs();
output_tensors->reserve(tensor_indexes.size());
for (int i = 0; i < tensor_indexes.size(); ++i) {
TfLiteTensor* tensor = interpreter_->tensor(tensor_indexes[i]);
output_tensors->emplace_back(
Tensor::ElementType::kFloat32,
Tensor::Shape{std::vector<int>{
tensor->dims->data, tensor->dims->data + tensor->dims->size}});
auto cpu_view = output_tensors->back().GetCpuWriteView();
std::memcpy(cpu_view.buffer<float>(), tensor->data.f,
output_tensors->back().bytes());
}
}
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::WriteKernelsToFile() {
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
if (use_kernel_caching_) {
// Save kernel file.
auto kernel_cache = absl::make_unique<std::vector<uint8_t>>(
tflite_gpu_runner_->GetSerializedBinaryCache());
std::string cache_str(kernel_cache->begin(), kernel_cache->end());
MP_RETURN_IF_ERROR(
mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
}
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::Close(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(WriteKernelsToFile());
#if MEDIAPIPE_TFLITE_GL_INFERENCE
if (use_gpu_delegate_) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
gpu_buffers_in_.clear();
gpu_buffers_out_.clear();
return ::mediapipe::OkStatus();
}));
}
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
converter_to_BPHWC4_ = nil;
converter_from_BPHWC4_ = nil;
gpu_buffers_in_.clear();
gpu_buffers_out_.clear();
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
#if defined(MEDIAPIPE_EDGE_TPU)
edgetpu_context_.reset();
#endif
interpreter_ = nullptr;
delegate_ = nullptr;
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::ReadKernelsFromFile() {
#if MEDIAPIPE_TFLITE_GL_INFERENCE && defined(MEDIAPIPE_ANDROID)
if (use_kernel_caching_) {
// Load pre-compiled kernel file.
if (mediapipe::File::Exists(cached_kernel_filename_)) {
std::string cache_str;
MP_RETURN_IF_ERROR(
mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
}
}
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE && MEDIAPIPE_ANDROID
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::InitTFLiteGPURunner(
CalculatorContext* cc) {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
tflite::ops::builtin::BuiltinOpResolver op_resolver;
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
op_resolver = cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER")
.Get<tflite::ops::builtin::BuiltinOpResolver>();
}
// Create runner
tflite::gpu::InferenceOptions options;
options.priority1 = tflite::gpu::InferencePriority::MIN_LATENCY;
options.priority2 = tflite::gpu::InferencePriority::AUTO;
options.priority3 = tflite::gpu::InferencePriority::AUTO;
options.usage = tflite::gpu::InferenceUsage::SUSTAINED_SPEED;
tflite_gpu_runner_ = std::make_unique<tflite::gpu::TFLiteGPURunner>(options);
MP_RETURN_IF_ERROR(
tflite_gpu_runner_->InitializeWithModel(model, op_resolver));
// Create and bind OpenGL buffers for outputs.
// The buffers are created once and their ids are passed to calculator outputs
output_shapes_.resize(tflite_gpu_runner_->outputs_size());
for (int i = 0; i < tflite_gpu_runner_->outputs_size(); ++i) {
output_shapes_[i] = {tflite_gpu_runner_->GetOutputShapes()[i].b,
tflite_gpu_runner_->GetOutputShapes()[i].h,
tflite_gpu_runner_->GetOutputShapes()[i].w,
tflite_gpu_runner_->GetOutputShapes()[i].c};
}
MP_RETURN_IF_ERROR(ReadKernelsFromFile());
MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
return ::mediapipe::OkStatus();
}
::mediapipe::Status InferenceCalculator::LoadModel(CalculatorContext* cc) {
ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
const auto& model = *model_packet_.Get<TfLiteModelPtr>();
tflite::ops::builtin::BuiltinOpResolver op_resolver;
if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
op_resolver = cc->InputSidePackets()
.Tag("CUSTOM_OP_RESOLVER")
.Get<tflite::ops::builtin::BuiltinOpResolver>();
}
#if defined(MEDIAPIPE_EDGE_TPU)
interpreter_ =
BuildEdgeTpuInterpreter(model, &op_resolver, edgetpu_context_.get());
#else
tflite::InterpreterBuilder(model, op_resolver)(&interpreter_);
#endif // MEDIAPIPE_EDGE_TPU
RET_CHECK(interpreter_);
#if defined(__EMSCRIPTEN__) || defined(MEDIAPIPE_EDGE_TPU)
interpreter_->SetNumThreads(1);
#else
interpreter_->SetNumThreads(
cc->Options<mediapipe::InferenceCalculatorOptions>().cpu_num_thread());
#endif // __EMSCRIPTEN__
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors.
CHECK(interpreter_->tensor(interpreter_->inputs()[0])->quantization.type !=
kTfLiteAffineQuantization);
return ::mediapipe::OkStatus();
}
::mediapipe::StatusOr<Packet> InferenceCalculator::GetModelAsPacket(
const CalculatorContext& cc) {
const auto& options = cc.Options<mediapipe::InferenceCalculatorOptions>();
if (!options.model_path().empty()) {
std::string model_path = options.model_path();
ASSIGN_OR_RETURN(model_path, mediapipe::PathToResourceAsFile(model_path));
auto model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
RET_CHECK(model) << "Failed to load model from path.";
return MakePacket<TfLiteModelPtr>(TfLiteModelPtr(
model.release(), [](tflite::FlatBufferModel* model) { delete model; }));
}
if (cc.InputSidePackets().HasTag("MODEL")) {
return cc.InputSidePackets().Tag("MODEL");
}
return ::mediapipe::Status(
::mediapipe::StatusCode::kNotFound,
"Must specify TFLite model as path or loaded model.");
}
::mediapipe::Status InferenceCalculator::LoadDelegate(CalculatorContext* cc) {
const auto& calculator_opts =
cc->Options<mediapipe::InferenceCalculatorOptions>();
if (calculator_opts.has_delegate() &&
calculator_opts.delegate().has_tflite()) {
// Default tflite inference requeqsted - no need to modify graph.
return ::mediapipe::OkStatus();
}
if (!use_gpu_delegate_) {
#if defined(MEDIAPIPE_ANDROID)
const bool nnapi_requested = calculator_opts.has_delegate()
? calculator_opts.delegate().has_nnapi()
: calculator_opts.use_nnapi();
if (nnapi_requested) {
// Attempt to use NNAPI.
// If not supported, the default CPU delegate will be created and used.
interpreter_->SetAllowFp16PrecisionForFp32(1);
delegate_ =
TfLiteDelegatePtr(tflite::NnApiDelegate(), [](TfLiteDelegate*) {
// No need to free according to tflite::NnApiDelegate()
// documentation.
});
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk);
return ::mediapipe::OkStatus();
}
#endif // MEDIAPIPE_ANDROID
#if defined(__EMSCRIPTEN__)
const bool xnnpack_requested = true;
#else
const bool xnnpack_requested = calculator_opts.has_delegate() &&
calculator_opts.delegate().has_xnnpack();
#endif // __EMSCRIPTEN__
#if !defined(MEDIAPIPE_EDGE_TPU)
if (xnnpack_requested) {
TfLiteXNNPackDelegateOptions xnnpack_opts{};
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
&TfLiteXNNPackDelegateDelete);
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk);
}
#endif // !EDGETPU
// Return, no need for GPU delegate below.
return ::mediapipe::OkStatus();
} else {
#if MEDIAPIPE_TFLITE_GL_INFERENCE
// Configure and create the delegate.
TfLiteGpuDelegateOptions options = TfLiteGpuDelegateOptionsDefault();
options.compile_options.precision_loss_allowed = 1;
options.compile_options.preferred_gl_object_type =
TFLITE_GL_OBJECT_TYPE_FASTEST;
options.compile_options.dynamic_batch_enabled = 0;
options.compile_options.inline_parameters = 1;
delegate_ = TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options),
&TfLiteGpuDelegateDelete);
// Get input image sizes.
const auto& input_indices = interpreter_->inputs();
for (int i = 0; i < input_indices.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[i]);
gpu_buffers_in_.emplace_back(absl::make_unique<Tensor>(
Tensor::ElementType::kFloat32,
Tensor::Shape{std::vector<int>{
tensor->dims->data, tensor->dims->data + tensor->dims->size}}));
RET_CHECK_EQ(
TfLiteGpuDelegateBindBufferToTensor(
delegate_.get(),
gpu_buffers_in_.back()->GetOpenGlBufferWriteView().name(),
interpreter_->inputs()[i]),
kTfLiteOk);
}
interpreter_->SetAllowBufferHandleOutput(true);
// Get output image sizes.
const auto& output_indices = interpreter_->outputs();
output_shapes_.resize(output_indices.size());
// Create and bind output buffers.
for (int i = 0; i < output_shapes_.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(output_indices[i]);
gpu_buffers_out_.emplace_back(absl::make_unique<Tensor>(
Tensor::ElementType::kFloat32,
Tensor::Shape{std::vector<int>{
tensor->dims->data, tensor->dims->data + tensor->dims->size}}));
RET_CHECK_EQ(
TfLiteGpuDelegateBindBufferToTensor(
delegate_.get(),
gpu_buffers_out_.back()->GetOpenGlBufferWriteView().name(),
output_indices[i]),
kTfLiteOk);
}
// Must call this last.
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk);
#elif MEDIAPIPE_TFLITE_METAL_INFERENCE
// Configure and create the delegate.
TFLGpuDelegateOptions options;
options.allow_precision_loss = true;
options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive;
delegate_ = TfLiteDelegatePtr(TFLGpuDelegateCreate(&options),
&TFLGpuDelegateDelete);
RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
kTfLiteOk);
id<MTLDevice> device = gpu_helper_.mtlDevice;
// Get input image sizes.
const auto& input_indices = interpreter_->inputs();
for (int i = 0; i < input_indices.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(input_indices[i]);
// Create and bind input buffer.
std::vector<int> dims{tensor->dims->data,
tensor->dims->data + tensor->dims->size};
dims.back() = RoundUp(dims.back(), 4);
gpu_buffers_in_.emplace_back(absl::make_unique<Tensor>(
Tensor::ElementType::kFloat16, Tensor::Shape{dims}));
auto buffer_view =
gpu_buffers_in_[i]->GetMtlBufferWriteView(gpu_helper_.mtlDevice);
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
delegate_.get(), input_indices[i], buffer_view.buffer()),
true);
}
interpreter_->SetAllowBufferHandleOutput(true);
// Get output image sizes.
const auto& output_indices = interpreter_->outputs();
output_shapes_.resize(output_indices.size());
for (int i = 0; i < output_shapes_.size(); ++i) {
const TfLiteTensor* tensor = interpreter_->tensor(output_indices[i]);
RET_CHECK(tensor->dims->size <= 4);
// Create and bind output buffers.
// Channels are always padded to multiple of 4.
std::vector<int> dims{tensor->dims->data,
tensor->dims->data + tensor->dims->size};
output_shapes_[i] = {dims};
dims.back() = RoundUp(dims.back(), 4);
gpu_buffers_out_.emplace_back(absl::make_unique<Tensor>(
Tensor::ElementType::kFloat16, Tensor::Shape{dims}));
RET_CHECK_EQ(TFLGpuDelegateBindMetalBufferToTensor(
delegate_.get(), output_indices[i],
gpu_buffers_out_[i]
->GetMtlBufferWriteView(gpu_helper_.mtlDevice)
.buffer()),
true);
}
// Create converter for GPU input.
converter_to_BPHWC4_ = [[TFLBufferConvert alloc] initWithDevice:device
isFloat16:true
convertToPBHWC4:true];
if (converter_to_BPHWC4_ == nil) {
return mediapipe::InternalError(
"Error initializating input buffer converter");
}
// Create converter for GPU output.
converter_from_BPHWC4_ = [[TFLBufferConvert alloc] initWithDevice:device
isFloat16:true
convertToPBHWC4:false];
if (converter_from_BPHWC4_ == nil) {
return mediapipe::InternalError(
"Error initializating output buffer converter");
}
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
}
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,111 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
// Full Example:
//
// node {
// calculator: "InferenceCalculator"
// input_stream: "TENSOR_IN:image_tensors"
// output_stream: "TENSOR_OUT:result_tensors"
// options {
// [mediapipe.InferenceCalculatorOptions.ext] {
// model_path: "model.tflite"
// delegate { gpu {} }
// }
// }
// }
//
message InferenceCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional InferenceCalculatorOptions ext = 336783863;
}
message Delegate {
// Default inference provided by tflite.
message TfLite {}
// Delegate to run GPU inference depending on the device.
// (Can use OpenGl, OpenCl, Metal depending on the device.)
message Gpu {
// Experimental, Android/Linux only. Use TFLite GPU delegate API2 for
// the NN inference.
// example:
// delegate: { gpu { use_advanced_gpu_api: true } }
optional bool use_advanced_gpu_api = 1 [default = false];
// This option is valid for TFLite GPU delegate API2 only,
// Choose any of available APIs to force running inference using it.
enum API {
ANY = 0;
OPENGL = 1;
OPENCL = 2;
}
optional API api = 4 [default = ANY];
// This option is valid for TFLite GPU delegate API2 only,
// Set to true to use 16-bit float precision. If max precision is needed,
// set to false for 32-bit float calculations only.
optional bool allow_precision_loss = 3 [default = true];
// Load pre-compiled serialized binary cache to accelerate init process.
// Only available for OpenCL delegate on Android.
optional bool use_kernel_caching = 2 [default = false];
}
// Android only.
message Nnapi {}
message Xnnpack {
// Number of threads for XNNPACK delegate. (By default, calculator tries
// to choose optimal number of threads depending on the device.)
optional int32 num_threads = 1 [default = -1];
}
oneof delegate {
TfLite tflite = 1;
Gpu gpu = 2;
Nnapi nnapi = 3;
Xnnpack xnnpack = 4;
}
}
// Path to the TF Lite model (ex: /path/to/modelname.tflite).
// On mobile, this is generally just modelname.tflite.
optional string model_path = 1;
// Whether the TF Lite GPU or CPU backend should be used. Effective only when
// input tensors are on CPU. For input tensors on GPU, GPU backend is always
// used.
// DEPRECATED: configure "delegate" instead.
optional bool use_gpu = 2 [deprecated = true, default = false];
// Android only. When true, an NNAPI delegate will be used for inference.
// If NNAPI is not available, then the default CPU delegate will be used
// automatically.
// DEPRECATED: configure "delegate" instead.
optional bool use_nnapi = 3 [deprecated = true, default = false];
// The number of threads available to the interpreter. Effective only when
// input tensors are on CPU and 'use_gpu' is false.
optional int32 cpu_num_thread = 4 [default = -1];
// TfLite delegate to run inference.
// NOTE: calculator is free to choose delegate if not specified explicitly.
// NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
// precedence over use_* deprecated options.)
optional Delegate delegate = 5;
}

View File

@ -0,0 +1,162 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/str_replace.h"
#include "absl/strings/string_view.h"
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
#include "mediapipe/framework/tool/validate_type.h"
#include "tensorflow/lite/error_reporter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#ifdef __APPLE__
#include <CoreFoundation/CoreFoundation.h>
#endif // defined(__APPLE__)
namespace mediapipe {
using ::tflite::Interpreter;
void DoSmokeTest(const std::string& graph_proto) {
const int width = 8;
const int height = 8;
const int channels = 3;
// Prepare input tensor.
auto input_vec = absl::make_unique<std::vector<Tensor>>();
input_vec->emplace_back(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels});
{
auto view1 = input_vec->back().GetCpuWriteView();
auto tensor_buffer = view1.buffer<float>();
ASSERT_NE(tensor_buffer, nullptr);
for (int i = 0; i < width * height * channels - 1; i++) {
tensor_buffer[i] = 1;
}
}
// Prepare single calculator graph to and wait for packets.
CalculatorGraphConfig graph_config =
ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor_out", &graph_config, &output_packets);
CalculatorGraph graph(graph_config);
MP_ASSERT_OK(graph.StartRun({}));
// Push the tensor into the graph.
MP_ASSERT_OK(graph.AddPacketToInputStream(
"tensor_in", Adopt(input_vec.release()).At(Timestamp(0))));
// Wait until the calculator done processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_EQ(1, output_packets.size());
// Get and process results.
const std::vector<Tensor>& result_vec =
output_packets[0].Get<std::vector<Tensor>>();
ASSERT_EQ(1, result_vec.size());
const Tensor& result = result_vec[0];
auto view = result.GetCpuReadView();
auto result_buffer = view.buffer<float>();
ASSERT_NE(result_buffer, nullptr);
for (int i = 0; i < width * height * channels - 1; i++) {
ASSERT_EQ(3, result_buffer[i]);
}
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("tensor_in"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
// Tests a simple add model that adds an input tensor to itself.
TEST(InferenceCalculatorTest, SmokeTest) {
std::string graph_proto = R"(
input_stream: "tensor_in"
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:tensor_in"
output_stream: "TENSORS:tensor_out"
options {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/calculators/tensor/testdata/add.bin"
$delegate
}
}
}
)";
// Test CPU inference only.
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
graph_proto, {{"$delegate", "delegate { tflite {} }"}}));
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
graph_proto, {{"$delegate", "delegate { xnnpack {} }"}}));
DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
graph_proto,
{{"$delegate", "delegate { xnnpack { num_threads: 10 } }"}}));
}
TEST(InferenceCalculatorTest, SmokeTest_ModelAsInputSidePacket) {
std::string graph_proto = R"(
input_stream: "tensor_in"
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:model_path"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet { string_value: "mediapipe/calculators/tensor/testdata/add.bin" }
}
}
}
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:model_path"
output_side_packet: "CONTENTS:model_blob"
}
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:model_blob"
output_side_packet: "MODEL:model"
}
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:tensor_in"
output_stream: "TENSORS:tensor_out"
input_side_packet: "MODEL:model"
options {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate { tflite {} }
}
}
}
)";
DoSmokeTest(graph_proto);
}
} // namespace mediapipe

View File

@ -0,0 +1,676 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/resource_util.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
#if MEDIAPIPE_METAL_ENABLED
#import <CoreVideo/CoreVideo.h>
#import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
#import "mediapipe/gpu/MPPMetalHelper.h"
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include "mediapipe/gpu/gl_calculator_helper.h"
#if MEDIAPIPE_OPENGL_ES_VERSION < MEDIAPIPE_OPENGL_ES_31
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/shader_util.h"
#endif // MEDIAPIPE_OPENGL_ES_VERSION < MEDIAPIPE_OPENGL_ES_31
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
namespace {
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
// Commonly used to compute the number of blocks to launch in a kernel.
int NumGroups(const int size, const int group_size) { // NOLINT
return (size + group_size - 1) / group_size;
}
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
RowMajorMatrixXf;
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
ColMajorMatrixXf;
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kTensorsTag[] = "TENSORS";
constexpr char kMatrixTag[] = "MATRIX";
} // namespace
namespace mediapipe {
// Calculator for normalizing and converting an ImageFrame, GpuBuffer or Matrix
// into a Tensor.
//
// This calculator is designed to be used with the TfLiteInferenceCalcualtor,
// as a pre-processing step for calculator inputs.
//
// IMAGE and IMAGE_GPU inputs are normalized to [-1,1] (default) or [0,1],
// specified by options (unless outputting a quantized tensor).
//
// Input:
// One of the following tags:
// IMAGE - ImageFrame (assumed to be 8-bit or 32-bit data).
// IMAGE_GPU - GpuBuffer (assumed to be RGBA or RGB GL texture).
// MATRIX - Matrix.
//
// Output:
// One of the following tags:
// TENSORS - Vector of Tensors of type kFloat32. The resource type used:
// - MTLBuffer if Metal API is available
// - SSBO if Metal is unavailable and OpenGL ES 3.1 is available
// - Texture2D if Metal and GLES 3.1 are not available and GLES 3.0 is.
//
// Example use:
// node {
// calculator: "TensorConverterCalculator"
// input_stream: "IMAGE:input_image"
// output_stream: "TENSORS:image_tensor"
// options: {
// [mediapipe.TensorConverterCalculatorOptions.ext] {
// zero_center: true
// }
// }
// }
//
// IMPORTANT Notes:
// GPU tensors are currently only supported on mobile platforms.
class TensorConverterCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
private:
::mediapipe::Status InitGpu(CalculatorContext* cc);
::mediapipe::Status LoadOptions(CalculatorContext* cc);
template <class T>
::mediapipe::Status NormalizeImage(const ImageFrame& image_frame,
bool flip_vertically, float* tensor_ptr);
::mediapipe::Status CopyMatrixToTensor(const Matrix& matrix,
float* tensor_ptr);
::mediapipe::Status ProcessCPU(CalculatorContext* cc);
::mediapipe::Status ProcessGPU(CalculatorContext* cc);
#if MEDIAPIPE_METAL_ENABLED
MPPMetalHelper* gpu_helper_ = nullptr;
id<MTLComputePipelineState> to_buffer_program_;
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
mediapipe::GlCalculatorHelper gpu_helper_;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
GLuint to_buffer_program_;
#else
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
GLuint to_tex2d_program_;
GLuint framebuffer_;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
#endif // MEDIAPIPE_METAL_ENABLED
bool initialized_ = false;
bool use_gpu_ = false;
absl::optional<std::pair<float, float>> output_range_;
bool flip_vertically_ = false;
bool row_major_matrix_ = false;
int max_num_channels_ = 3;
};
REGISTER_CALCULATOR(TensorConverterCalculator);
::mediapipe::Status TensorConverterCalculator::GetContract(
CalculatorContract* cc) {
// Confirm only one of the input streams is present.
RET_CHECK(static_cast<int>(cc->Inputs().HasTag(kImageFrameTag)) +
static_cast<int>(cc->Inputs().HasTag(kGpuBufferTag)) +
static_cast<int>(cc->Inputs().HasTag(kMatrixTag)) ==
1);
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
}
if (cc->Inputs().HasTag(kMatrixTag)) {
cc->Inputs().Tag(kMatrixTag).Set<Matrix>();
}
#if !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag(kGpuBufferTag)) {
cc->Inputs().Tag(kGpuBufferTag).Set<mediapipe::GpuBuffer>();
#if MEDIAPIPE_METAL_ENABLED
MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]);
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#endif // MEDIAPIPE_METAL_ENABLED
}
#endif // !MEDIAPIPE_DISABLE_GPU
RET_CHECK(cc->Outputs().HasTag(kTensorsTag));
cc->Outputs().Tag(kTensorsTag).Set<std::vector<Tensor>>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
MP_RETURN_IF_ERROR(LoadOptions(cc));
#if !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag(kGpuBufferTag)) {
use_gpu_ = true;
#if MEDIAPIPE_METAL_ENABLED
gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc];
RET_CHECK(gpu_helper_);
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif // MEDIAPIPE_METAL_ENABLED
}
#endif // !MEDIAPIPE_DISABLE_GPU
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::Process(CalculatorContext* cc) {
if (use_gpu_) {
if (cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
// Convert to GPU tensors type.
MP_RETURN_IF_ERROR(ProcessGPU(cc));
} else {
// Convert to CPU tensors or Matrix type.
MP_RETURN_IF_ERROR(ProcessCPU(cc));
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
if (use_gpu_) {
#if MEDIAPIPE_METAL_ENABLED
to_buffer_program_ = nil;
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
gpu_helper_.RunInGlContext([this] {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
glDeleteProgram(to_buffer_program_);
#else
glDeleteFramebuffers(1, &framebuffer_);
glDeleteProgram(to_tex2d_program_);
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
});
#endif // MEDIAPIPE_METAL_ENABLED
}
#endif // !MEDIAPIPE_DISABLE_GPU
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::ProcessCPU(
CalculatorContext* cc) {
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
if (cc->Inputs().HasTag(kImageFrameTag)) {
if (cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& image_frame =
cc->Inputs().Tag(kImageFrameTag).Get<ImageFrame>();
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels_);
const mediapipe::ImageFormat::Format format = image_frame.Format();
if (!(format == mediapipe::ImageFormat::SRGBA ||
format == mediapipe::ImageFormat::SRGB ||
format == mediapipe::ImageFormat::GRAY8 ||
format == mediapipe::ImageFormat::VEC32F1))
RET_CHECK_FAIL() << "Unsupported CPU input format.";
output_tensors->emplace_back(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels_preserved});
auto cpu_view = output_tensors->back().GetCpuWriteView();
// Copy image data into tensor.
if (image_frame.ByteDepth() == 1) {
MP_RETURN_IF_ERROR(NormalizeImage<uint8>(image_frame, flip_vertically_,
cpu_view.buffer<float>()));
} else if (image_frame.ByteDepth() == 4) {
MP_RETURN_IF_ERROR(NormalizeImage<float>(image_frame, flip_vertically_,
cpu_view.buffer<float>()));
} else {
return ::mediapipe::InternalError(
"Only byte-based (8 bit) and float (32 bit) images supported.");
}
} else if (cc->Inputs().HasTag(kMatrixTag)) {
if (cc->Inputs().Tag(kMatrixTag).IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& matrix = cc->Inputs().Tag(kMatrixTag).Get<Matrix>();
const int height = matrix.rows();
const int width = matrix.cols();
const int channels = 1;
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels});
MP_RETURN_IF_ERROR(CopyMatrixToTensor(
matrix, output_tensors->back().GetCpuWriteView().buffer<float>()));
} else {
return ::mediapipe::OkStatus();
}
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::ProcessGPU(
CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
if (!initialized_) {
MP_RETURN_IF_ERROR(InitGpu(cc));
initialized_ = true;
}
const auto& input =
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
int width = input.width();
int height = input.height();
int channels = max_num_channels_;
auto output_tensors = absl::make_unique<std::vector<Tensor>>();
output_tensors->emplace_back(Tensor::ElementType::kFloat32,
Tensor::Shape{1, height, width, channels});
#if MEDIAPIPE_METAL_ENABLED
id<MTLDevice> device = gpu_helper_.mtlDevice;
id<MTLCommandBuffer> command_buffer = [gpu_helper_ commandBuffer];
command_buffer.label = @"TensorConverterCalculatorConvert";
id<MTLComputeCommandEncoder> compute_encoder =
[command_buffer computeCommandEncoder];
[compute_encoder setComputePipelineState:to_buffer_program_];
id<MTLTexture> src_texture = [gpu_helper_ metalTextureWithGpuBuffer:input];
[compute_encoder setTexture:src_texture atIndex:0];
auto output_view =
output_tensors->at(0).GetMtlBufferWriteView(command_buffer);
[compute_encoder setBuffer:output_view.buffer() offset:0 atIndex:1];
MTLSize threads_per_group = MTLSizeMake(kWorkgroupSize, kWorkgroupSize, 1);
MTLSize threadgroups =
MTLSizeMake(NumGroups(input.width(), kWorkgroupSize),
NumGroups(input.height(), kWorkgroupSize), 1);
[compute_encoder dispatchThreadgroups:threadgroups
threadsPerThreadgroup:threads_per_group];
[compute_encoder endEncoding];
[command_buffer commit];
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext(
[this, &output_tensors, &input]() -> ::mediapipe::Status {
auto src = gpu_helper_.CreateSourceTexture(input);
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Convert GL texture into SSBO.
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, src.name());
auto output_view = output_tensors->back().GetOpenGlBufferWriteView();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, output_view.name());
glUseProgram(to_buffer_program_);
glDispatchCompute(NumGroups(input.width(), kWorkgroupSize),
NumGroups(input.height(), kWorkgroupSize), 1);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
glBindTexture(GL_TEXTURE_2D, 0);
#else
// Texture2D -> Texture2D with OpenGL ES 3.0.
glUseProgram(to_tex2d_program_);
glDisable(GL_DEPTH_TEST);
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_);
glViewport(0, 0, src.width(), src.height());
glActiveTexture(GL_TEXTURE0);
auto output_view = output_tensors->back().GetOpenGlTexture2dWriteView();
glBindTexture(GL_TEXTURE_2D, output_view.name());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, output_view.name(), 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(src.target(), src.name());
glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0,
mediapipe::kBasicSquareVertices);
glEnableVertexAttribArray(ATTRIB_VERTEX);
glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
mediapipe::kBasicTextureVertices);
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
// draw
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// cleanup
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
src.Release();
return ::mediapipe::OkStatus();
}));
#endif // MEDIAPIPE_METAL_ENABLED
cc->Outputs()
.Tag(kTensorsTag)
.Add(output_tensors.release(), cc->InputTimestamp());
#else
RET_CHECK_FAIL() << "GPU processing is not enabled.";
#endif // !MEDIAPIPE_DISABLE_GPU
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
// Get input image sizes.
const auto& input =
cc->Inputs().Tag(kGpuBufferTag).Get<mediapipe::GpuBuffer>();
mediapipe::ImageFormat::Format format =
mediapipe::ImageFormatForGpuBufferFormat(input.format());
const bool include_alpha = (max_num_channels_ == 4);
const bool single_channel = (max_num_channels_ == 1);
if (!(format == mediapipe::ImageFormat::GRAY8 ||
format == mediapipe::ImageFormat::SRGB ||
format == mediapipe::ImageFormat::SRGBA))
RET_CHECK_FAIL() << "Unsupported GPU input format.";
if (include_alpha && (format != mediapipe::ImageFormat::SRGBA))
RET_CHECK_FAIL() << "Num input channels is less than desired output.";
#if MEDIAPIPE_METAL_ENABLED
id<MTLDevice> device = gpu_helper_.mtlDevice;
// Shader to convert GL Texture to Metal Buffer,
// with normalization to either: [0,1] or [-1,1].
const std::string shader_source = absl::Substitute(
R"(
#include <metal_stdlib>
using namespace metal;
kernel void convertKernel(
texture2d<half, access::sample> in_tex [[ texture(0) ]],
device float* out_buf [[ buffer(1) ]],
uint2 gid [[ thread_position_in_grid ]]) {
if (gid.x >= in_tex.get_width() || gid.y >= in_tex.get_height()) return;
constexpr sampler texture_sampler(coord::pixel, address::clamp_to_edge);
const float2 coord = float2(gid.x, gid.y);
half4 pixel = in_tex.sample(texture_sampler, coord);
$0 // normalize [-1,1]
const int linear_index = $1 * ($2 * in_tex.get_width() + gid.x);
out_buf[linear_index + 0] = pixel.x;
$3 // g & b channels
$4 // alpha channel
}
)",
/*$0=*/
output_range_.has_value()
? absl::Substitute("pixel = pixel * half($0) + half($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$1=*/max_num_channels_,
/*$2=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y",
/*$3=*/
single_channel ? "" : R"(out_buf[linear_index + 1] = pixel.y;
out_buf[linear_index + 2] = pixel.z;)",
/*$4=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : "");
NSString* library_source =
[NSString stringWithUTF8String:shader_source.c_str()];
NSError* error = nil;
id<MTLLibrary> library =
[device newLibraryWithSource:library_source options:nullptr error:&error];
RET_CHECK(library != nil) << "Couldn't create shader library "
<< [[error localizedDescription] UTF8String];
id<MTLFunction> kernel_func = nil;
kernel_func = [library newFunctionWithName:@"convertKernel"];
RET_CHECK(kernel_func != nil) << "Couldn't create kernel function.";
to_buffer_program_ =
[device newComputePipelineStateWithFunction:kernel_func error:&error];
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
[[error localizedDescription] UTF8String];
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &include_alpha,
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
&input,
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
&single_channel]()
-> ::mediapipe::Status {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Shader to convert GL Texture to Shader Storage Buffer Object (SSBO),
// with normalization to either: [0,1] or [-1,1].
const std::string shader_source = absl::Substitute(
R"( #version 310 es
layout(local_size_x = $0, local_size_y = $0) in;
layout(binding = 0) uniform sampler2D input_texture;
layout(std430, binding = 1) buffer Output {float elements[];} output_data;
ivec2 width_height = ivec2($1, $2);
void main() {
ivec2 gid = ivec2(gl_GlobalInvocationID.xy);
if (gid.x >= width_height.x || gid.y >= width_height.y) return;
vec4 pixel = texelFetch(input_texture, gid, 0);
$3 // normalize [-1,1]
int linear_index = $7 * ($4 * width_height.x + gid.x);
output_data.elements[linear_index + 0] = pixel.x; // r channel
$5 // g & b channels
$6 // alpha channel
})",
/*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(),
/*$3=*/
output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y",
/*$5=*/
single_channel ? ""
: R"(output_data.elements[linear_index + 1] = pixel.y;
output_data.elements[linear_index + 2] = pixel.z;)",
/*$6=*/
include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;"
: "",
/*$7=*/max_num_channels_);
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
const GLchar* sources[] = {shader_source.c_str()};
glShaderSource(shader, 1, sources, NULL);
glCompileShader(shader);
GLint compiled = GL_FALSE;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
RET_CHECK(compiled == GL_TRUE);
to_buffer_program_ = glCreateProgram();
glAttachShader(to_buffer_program_, shader);
glDeleteShader(shader);
glLinkProgram(to_buffer_program_);
#else
// OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion.
const std::string shader_source = absl::Substitute(
R"(
#if __VERSION__ < 130
#define in varying
#endif // __VERSION__ < 130
#ifdef GL_ES
#define fragColor gl_FragColor
precision highp float;
#else
#define lowp
#define mediump
#define highp
#define texture2D texture
out $0 fragColor;
#endif // defined(GL_ES)
in vec2 sample_coordinate;
uniform sampler2D frame;
void main() {
$1 // flip
vec4 pixel = texture2D(frame, sample_coordinate);
$2 // normalize [-1,1]
fragColor.r = pixel.r; // r channel
$3 // g & b channels
$4 // alpha channel
})",
/*$0=*/single_channel ? "vec1" : "vec4",
/*$1=*/
flip_vertically_ ? "sample_coordinate.y = 1.0 - sample_coordinate.y;"
: "",
/*$2=*/output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$3=*/single_channel ? "" : R"(fragColor.g = pixel.g;
fragColor.b = pixel.b;)",
/*$4=*/
include_alpha ? "fragColor.a = pixel.a;"
: (single_channel ? "" : "fragColor.a = 1.0;"));
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
};
const GLchar* attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
};
// shader program and params
mediapipe::GlhCreateProgram(
mediapipe::kBasicVertexShader, shader_source.c_str(), NUM_ATTRIBUTES,
&attr_name[0], attr_location, &to_tex2d_program_);
RET_CHECK(to_tex2d_program_) << "Problem initializing the program.";
glUseProgram(to_tex2d_program_);
glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1);
glGenFramebuffers(1, &framebuffer_);
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
return ::mediapipe::OkStatus();
}));
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // !MEDIAPIPE_DISABLE_GPU
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::LoadOptions(
CalculatorContext* cc) {
// Get calculator options specified in the graph.
const auto& options =
cc->Options<::mediapipe::TensorConverterCalculatorOptions>();
// if zero_center, set output float range to match [-1, 1] as specified in
// calculator proto.
if (options.zero_center()) {
output_range_.emplace(std::pair<float, float>(-1.0, 1.0));
}
// Custom output_tensor_float_range values.
// If the float range is specified in pb text, use the specified values
// instead.
if (options.has_output_tensor_float_range()) {
output_range_.emplace(options.output_tensor_float_range().min(),
options.output_tensor_float_range().max());
CHECK_GT(output_range_->second, output_range_->first);
}
// Custom div and sub values.
if (options.use_custom_normalization()) {
output_range_.emplace(std::pair<float, float>(
-options.custom_sub(),
-options.custom_sub() + 255.0 / options.custom_div()));
}
// Get y-flip mode.
flip_vertically_ = options.flip_vertically();
// Get row_major_matrix mode.
row_major_matrix_ = options.row_major_matrix();
// Get desired way to handle input channels.
max_num_channels_ = options.max_num_channels();
CHECK_GE(max_num_channels_, 1);
CHECK_LE(max_num_channels_, 4);
CHECK_NE(max_num_channels_, 2);
return ::mediapipe::OkStatus();
}
template <class T>
::mediapipe::Status TensorConverterCalculator::NormalizeImage(
const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr) {
const int height = image_frame.Height();
const int width = image_frame.Width();
const int channels = image_frame.NumberOfChannels();
const int channels_preserved = std::min(channels, max_num_channels_);
const int channels_ignored = channels - channels_preserved;
if (output_range_.has_value()) {
// If the output float range is set and we are not using custom
// normalization, normalize the pixel values from [0, 255] to the specified
// output range.
RET_CHECK_NE(output_range_->first, output_range_->second);
const float scale = (output_range_->second - output_range_->first) / 255.0f;
const float bias = output_range_->first;
for (int i = 0; i < height; ++i) {
const T* image_ptr = reinterpret_cast<const T*>(
image_frame.PixelData() +
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_ptr++ = *image_ptr++ * scale + bias;
}
image_ptr += channels_ignored;
}
}
} else {
// [0,1], scale only (bias == 0)
// Verified that there are no precision issues with 1.0f / 255.0f expression
const float scale = 1.0f / 255.0f;
for (int i = 0; i < height; ++i) {
const T* image_ptr = reinterpret_cast<const T*>(
image_frame.PixelData() +
(flip_vertically ? height - 1 - i : i) * image_frame.WidthStep());
for (int j = 0; j < width; ++j) {
for (int c = 0; c < channels_preserved; ++c) {
*tensor_ptr++ = *image_ptr++ * scale;
}
image_ptr += channels_ignored;
}
}
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorConverterCalculator::CopyMatrixToTensor(
const Matrix& matrix, float* tensor_ptr) {
if (row_major_matrix_) {
auto matrix_map =
Eigen::Map<RowMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
} else {
auto matrix_map =
Eigen::Map<ColMajorMatrixXf>(tensor_ptr, matrix.rows(), matrix.cols());
matrix_map = matrix;
}
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,69 @@
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
// Full Example:
//
// node {
// calculator: "TensorConverterCalculator"
// input_stream: "IMAGE_IN:input_image"
// output_stream: "TENSOR_OUT:image_tensor"
// options {
// [mediapipe.TensorConverterCalculatorOptions.ext] {
// zero_center: true
// }
// }
// }
//
message TensorConverterCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional TensorConverterCalculatorOptions ext = 335742637;
}
// Choose normalization mode for output (not applied for Matrix inputs).
// true = [-1,1]
// false = [0,1]
// Ignored if using quantization.
optional bool zero_center = 1 [default = true];
// Custom settings to override the internal scaling factors `div` and `sub`.
// Both values must be set to non-negative values. Will only take effect on
// CPU AND when |use_custom_normalization| is set to true. When these custom
// values take effect, the |zero_center| setting above will be overriden, and
// the normalized_value will be calculated as:
// normalized_value = input / custom_div - custom_sub.
optional bool use_custom_normalization = 6 [default = false];
optional float custom_div = 7 [default = -1.0];
optional float custom_sub = 8 [default = -1.0];
// Whether the input image should be flipped vertically (along the
// y-direction). This is useful, for example, when the input image is defined
// with a coordinate system where the origin is at the bottom-left corner
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
// origin.
optional bool flip_vertically = 2 [default = false];
// Controls how many channels of the input image get passed through to the
// tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
optional int32 max_num_channels = 3 [default = 3];
// The calculator expects Matrix inputs to be in column-major order. Set
// row_major_matrix to true if the inputs are in row-major order.
optional bool row_major_matrix = 4 [default = false];
// Quantization option (CPU only).
// When true, output kUint8 tensor instead of kFloat32.
optional bool use_quantized_tensors = 5 [default = false];
// Normalization option.
// Setting normalization_range results in the values normalized to
// the range [output_tensor_float_range.min, output_tensor_float_range.max].
optional TensorFloatRange output_tensor_float_range = 9;
message TensorFloatRange {
optional float min = 1;
optional float max = 2;
}
}

View File

@ -0,0 +1,323 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <random>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h" // NOLINT
#include "mediapipe/framework/tool/validate_type.h"
namespace mediapipe {
namespace {
constexpr char kTransposeOptionsString[] =
"[mediapipe.TensorConverterCalculatorOptions.ext]: {"
"row_major_matrix: True}";
} // namespace
using RandomEngine = std::mt19937_64;
using testing::Eq;
const uint32 kSeed = 1234;
const int kNumSizes = 8;
const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2},
{5, 3}, {7, 13}, {16, 32}, {101, 2}};
class TensorConverterCalculatorTest : public ::testing::Test {
protected:
// Adds a packet with a matrix filled with random values in [0,1].
void AddRandomMatrix(int num_rows, int num_columns, uint32 seed,
bool row_major_matrix = false) {
RandomEngine random(kSeed);
std::uniform_real_distribution<> uniform_dist(0, 1.0);
auto matrix = ::absl::make_unique<Matrix>();
matrix->resize(num_rows, num_columns);
if (row_major_matrix) {
for (int y = 0; y < num_rows; ++y) {
for (int x = 0; x < num_columns; ++x) {
float value = uniform_dist(random);
(*matrix)(y, x) = value;
}
}
} else {
for (int x = 0; x < num_columns; ++x) {
for (int y = 0; y < num_rows; ++y) {
float value = uniform_dist(random);
(*matrix)(y, x) = value;
}
}
}
MP_ASSERT_OK(graph_->AddPacketToInputStream(
"matrix", Adopt(matrix.release()).At(Timestamp(0))));
}
std::unique_ptr<CalculatorGraph> graph_;
};
TEST_F(TensorConverterCalculatorTest, RandomMatrixColMajor) {
for (int size_index = 0; size_index < kNumSizes; ++size_index) {
const int num_rows = sizes[size_index][0];
const int num_columns = sizes[size_index][1];
// Run the calculator and verify that one output is generated.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "matrix"
node {
calculator: "TensorConverterCalculator"
input_stream: "MATRIX:matrix"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
row_major_matrix: false
}
}
}
)");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
graph_ = absl::make_unique<CalculatorGraph>();
MP_ASSERT_OK(graph_->Initialize(graph_config));
MP_ASSERT_OK(graph_->StartRun({}));
// Push the tensor into the graph.
AddRandomMatrix(num_rows, num_columns, kSeed, /*row_major_matrix=*/false);
// Wait until the calculator done processing.
MP_ASSERT_OK(graph_->WaitUntilIdle());
EXPECT_EQ(1, output_packets.size());
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
EXPECT_EQ(1, tensor_vec.size());
const Tensor* tensor = &tensor_vec[0];
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
// Verify that the data is correct.
RandomEngine random(kSeed);
std::uniform_real_distribution<> uniform_dist(0, 1.0);
auto view = tensor->GetCpuReadView();
auto tensor_buffer = view.buffer<float>();
for (int i = 0; i < num_rows * num_columns; ++i) {
const float expected = uniform_dist(random);
EXPECT_EQ(expected, tensor_buffer[i]) << "at i = " << i;
}
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph_->CloseInputStream("matrix"));
MP_ASSERT_OK(graph_->WaitUntilDone());
graph_.reset();
}
}
TEST_F(TensorConverterCalculatorTest, RandomMatrixRowMajor) {
for (int size_index = 0; size_index < kNumSizes; ++size_index) {
const int num_rows = sizes[size_index][0];
const int num_columns = sizes[size_index][1];
// Run the calculator and verify that one output is generated.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "matrix"
node {
calculator: "TensorConverterCalculator"
input_stream: "MATRIX:matrix"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
row_major_matrix: true
}
}
}
)");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
graph_ = absl::make_unique<CalculatorGraph>();
MP_ASSERT_OK(graph_->Initialize(graph_config));
MP_ASSERT_OK(graph_->StartRun({}));
// Push the tensor into the graph.
AddRandomMatrix(num_rows, num_columns, kSeed, /*row_major_matrix=*/true);
// Wait until the calculator done processing.
MP_ASSERT_OK(graph_->WaitUntilIdle());
EXPECT_EQ(1, output_packets.size());
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
EXPECT_EQ(1, tensor_vec.size());
const Tensor* tensor = &tensor_vec[0];
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
// Verify that the data is correct.
RandomEngine random(kSeed);
std::uniform_real_distribution<> uniform_dist(0, 1.0);
auto view = tensor->GetCpuReadView();
auto tensor_buffer = view.buffer<float>();
for (int i = 0; i < num_rows * num_columns; ++i) {
const float expected = uniform_dist(random);
EXPECT_EQ(expected, tensor_buffer[i]) << "at i = " << i;
}
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph_->CloseInputStream("matrix"));
MP_ASSERT_OK(graph_->WaitUntilDone());
graph_.reset();
}
}
TEST_F(TensorConverterCalculatorTest, CustomDivAndSub) {
CalculatorGraph graph;
// Run the calculator and verify that one output is generated.
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
input_stream: "input_image"
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:input_image"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
row_major_matrix: true
use_custom_normalization: true
custom_div: 2.0
custom_sub: 33.0
}
}
}
)");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = ::mediapipe::formats::MatView(input_image.get());
mat.at<uint8>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
// Wait until the calculator done processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
EXPECT_EQ(1, tensor_vec.size());
const Tensor* tensor = &tensor_vec[0];
EXPECT_EQ(Tensor::ElementType::kFloat32, tensor->element_type());
auto view = tensor->GetCpuReadView();
EXPECT_FLOAT_EQ(67.0f, *view.buffer<float>());
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
std::vector<std::pair<float, float>> range_values = {
std::make_pair(0.0, 1.0), std::make_pair(-1.0, 1.0),
std::make_pair(-0.5, 0.5)};
for (std::pair<float, float> range : range_values) {
CalculatorGraph graph;
CalculatorGraphConfig graph_config =
::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(R"(
input_stream: "input_image"
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:input_image"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
output_tensor_float_range {
min: $0
max: $1
}
}
}
}
)",
/*$0=*/range.first,
/*$1=*/range.second));
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = ::mediapipe::formats::MatView(input_image.get());
mat.at<uint8>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
// Wait until the calculator finishes processing.
MP_ASSERT_OK(graph.WaitUntilIdle());
EXPECT_THAT(output_packets.size(), Eq(1));
// Get and process results.
const std::vector<Tensor>& tensor_vec =
output_packets[0].Get<std::vector<Tensor>>();
EXPECT_THAT(tensor_vec.size(), Eq(1));
const Tensor* tensor = &tensor_vec[0];
// Calculate the expected normalized value:
float normalized_value =
range.first + (200 * (range.second - range.first)) / 255.0;
EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32));
auto view = tensor->GetCpuReadView();
float dataf = *view.buffer<float>();
EXPECT_THAT(
normalized_value,
testing::FloatNear(dataf, 2.0f * std::abs(dataf) *
std::numeric_limits<float>::epsilon()));
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
}
} // namespace mediapipe

View File

@ -0,0 +1,197 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <unordered_map>
#include <vector>
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "mediapipe/calculators/tensor/tensors_to_classification_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/util/resource_util.h"
#if defined(MEDIAPIPE_MOBILE)
#include "mediapipe/util/android/file/base/file.h"
#include "mediapipe/util/android/file/base/helpers.h"
#else
#include "mediapipe/framework/port/file_helpers.h"
#endif
namespace mediapipe {
// Convert result tensors from classification models into MediaPipe
// classifications.
//
// Input:
// TENSORS - Vector of Tensors of type kFloat32 containing one
// tensor, the size of which must be (1, * num_classes).
// Output:
// CLASSIFICATIONS - Result MediaPipe ClassificationList. The score and index
// fields of each classification are set, while the label
// field is only set if label_map_path is provided.
//
// Usage example:
// node {
// calculator: "TensorsToClassificationCalculator"
// input_stream: "TENSORS:tensors"
// output_stream: "CLASSIFICATIONS:classifications"
// options: {
// [mediapipe.TensorsToClassificationCalculatorOptions.ext] {
// num_classes: 1024
// min_score_threshold: 0.1
// label_map_path: "labelmap.txt"
// }
// }
// }
class TensorsToClassificationCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
::mediapipe::Status Close(CalculatorContext* cc) override;
private:
::mediapipe::TensorsToClassificationCalculatorOptions options_;
int top_k_ = 0;
std::unordered_map<int, std::string> label_map_;
bool label_map_loaded_ = false;
};
REGISTER_CALCULATOR(TensorsToClassificationCalculator);
::mediapipe::Status TensorsToClassificationCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(!cc->Inputs().GetTags().empty());
RET_CHECK(!cc->Outputs().GetTags().empty());
if (cc->Inputs().HasTag("TENSORS")) {
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
}
if (cc->Outputs().HasTag("CLASSIFICATIONS")) {
cc->Outputs().Tag("CLASSIFICATIONS").Set<ClassificationList>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToClassificationCalculator::Open(
CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ =
cc->Options<::mediapipe::TensorsToClassificationCalculatorOptions>();
top_k_ = options_.top_k();
if (options_.has_label_map_path()) {
std::string string_path;
ASSIGN_OR_RETURN(string_path,
PathToResourceAsFile(options_.label_map_path()));
std::string label_map_string;
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
std::istringstream stream(label_map_string);
std::string line;
int i = 0;
while (std::getline(stream, line)) {
label_map_[i++] = line;
}
label_map_loaded_ = true;
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToClassificationCalculator::Process(
CalculatorContext* cc) {
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
RET_CHECK_EQ(input_tensors.size(), 1);
int num_classes = input_tensors[0].shape().num_elements();
if (options_.binary_classification()) {
RET_CHECK_EQ(num_classes, 1);
// Number of classes for binary classification.
num_classes = 2;
}
if (label_map_loaded_) {
RET_CHECK_EQ(num_classes, label_map_.size());
}
auto view = input_tensors[0].GetCpuReadView();
auto raw_scores = view.buffer<float>();
auto classification_list = absl::make_unique<ClassificationList>();
if (options_.binary_classification()) {
Classification* class_first = classification_list->add_classification();
Classification* class_second = classification_list->add_classification();
class_first->set_index(0);
class_second->set_index(1);
class_first->set_score(raw_scores[0]);
class_second->set_score(1. - raw_scores[0]);
if (label_map_loaded_) {
class_first->set_label(label_map_[0]);
class_second->set_label(label_map_[1]);
}
} else {
for (int i = 0; i < num_classes; ++i) {
if (options_.has_min_score_threshold() &&
raw_scores[i] < options_.min_score_threshold()) {
continue;
}
Classification* classification =
classification_list->add_classification();
classification->set_index(i);
classification->set_score(raw_scores[i]);
if (label_map_loaded_) {
classification->set_label(label_map_[i]);
}
}
}
// Note that partial_sort will raise error when top_k_ >
// classification_list->classification_size().
CHECK_GE(classification_list->classification_size(), top_k_);
auto raw_classification_list = classification_list->mutable_classification();
if (top_k_ > 0 && classification_list->classification_size() >= top_k_) {
std::partial_sort(raw_classification_list->begin(),
raw_classification_list->begin() + top_k_,
raw_classification_list->end(),
[](const Classification a, const Classification b) {
return a.score() > b.score();
});
// Resizes the underlying list to have only top_k_ classifications.
raw_classification_list->DeleteSubrange(
top_k_, raw_classification_list->size() - top_k_);
}
cc->Outputs()
.Tag("CLASSIFICATIONS")
.Add(classification_list.release(), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToClassificationCalculator::Close(
CalculatorContext* cc) {
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,41 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The option proto for the TensorsToClassificationCalculator.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorsToClassificationCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional TensorsToClassificationCalculatorOptions ext = 335742638;
}
// Score threshold for perserving the class.
optional float min_score_threshold = 1;
// Number of highest scoring labels to output. If top_k is not positive then
// all labels are used.
optional int32 top_k = 2;
// Path to a label map file for getting the actual name of class ids.
optional string label_map_path = 3;
// Whether the input is a single float for binary classification.
// When true, only a single float is expected in the input tensor and the
// label map, if provided, is expected to have exactly two labels.
// The single score(float) represent the probability of first label, and
// 1 - score is the probabilility of the second label.
optional bool binary_classification = 4;
}

View File

@ -0,0 +1,174 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "absl/memory/memory.h"
#include "mediapipe/calculators/tensor/tensors_to_classification_calculator.pb.h"
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
using ::mediapipe::ParseTextProtoOrDie;
using Node = ::mediapipe::CalculatorGraphConfig::Node;
class TensorsToClassificationCalculatorTest : public ::testing::Test {
protected:
void BuildGraph(mediapipe::CalculatorRunner* runner,
const std::vector<float>& scores) {
auto tensors = absl::make_unique<std::vector<Tensor>>();
tensors->emplace_back(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, 1, static_cast<int>(scores.size()), 1});
auto view = tensors->back().GetCpuWriteView();
float* tensor_buffer = view.buffer<float>();
ASSERT_NE(tensor_buffer, nullptr);
for (int i = 0; i < scores.size(); ++i) {
tensor_buffer[i] = scores[i];
}
int64 stream_timestamp = 0;
auto& input_stream_packets =
runner->MutableInputs()->Tag("TENSORS").packets;
input_stream_packets.push_back(
mediapipe::Adopt(tensors.release())
.At(mediapipe::Timestamp(stream_timestamp++)));
}
};
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutput) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:tensors"
output_stream: "CLASSIFICATIONS:classifications"
options {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {}
}
)"));
BuildGraph(&runner, {0, 0.5, 1});
MP_ASSERT_OK(runner.Run());
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
EXPECT_EQ(1, output_packets_.size());
const auto& classification_list =
output_packets_[0].Get<ClassificationList>();
EXPECT_EQ(3, classification_list.classification_size());
// Verify that the label_id and score fields are set correctly.
for (int i = 0; i < classification_list.classification_size(); ++i) {
EXPECT_EQ(i, classification_list.classification(i).index());
EXPECT_EQ(i * 0.5, classification_list.classification(i).score());
ASSERT_FALSE(classification_list.classification(i).has_label());
}
}
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutputWithLabelMapPath) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:tensors"
output_stream: "CLASSIFICATIONS:classifications"
options {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
label_map_path: "mediapipe/calculators/tensor/testdata/labelmap.txt"
}
}
)"));
BuildGraph(&runner, {0, 0.5, 1});
MP_ASSERT_OK(runner.Run());
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
EXPECT_EQ(1, output_packets_.size());
const auto& classification_list =
output_packets_[0].Get<ClassificationList>();
EXPECT_EQ(3, classification_list.classification_size());
// Verify that the label field is set.
for (int i = 0; i < classification_list.classification_size(); ++i) {
EXPECT_EQ(i, classification_list.classification(i).index());
EXPECT_EQ(i * 0.5, classification_list.classification(i).score());
ASSERT_TRUE(classification_list.classification(i).has_label());
}
}
TEST_F(TensorsToClassificationCalculatorTest,
CorrectOutputWithLabelMinScoreThreshold) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:tensors"
output_stream: "CLASSIFICATIONS:classifications"
options {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
min_score_threshold: 0.6
}
}
)"));
BuildGraph(&runner, {0, 0.5, 1});
MP_ASSERT_OK(runner.Run());
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
EXPECT_EQ(1, output_packets_.size());
const auto& classification_list =
output_packets_[0].Get<ClassificationList>();
// Verify that the low score labels are filtered out.
EXPECT_EQ(1, classification_list.classification_size());
EXPECT_EQ(1, classification_list.classification(0).score());
}
TEST_F(TensorsToClassificationCalculatorTest, CorrectOutputWithTopK) {
mediapipe::CalculatorRunner runner(ParseTextProtoOrDie<Node>(R"(
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:tensors"
output_stream: "CLASSIFICATIONS:classifications"
options {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] { top_k: 2 }
}
)"));
BuildGraph(&runner, {0, 0.5, 1});
MP_ASSERT_OK(runner.Run());
const auto& output_packets_ = runner.Outputs().Tag("CLASSIFICATIONS").packets;
EXPECT_EQ(1, output_packets_.size());
const auto& classification_list =
output_packets_[0].Get<ClassificationList>();
// Verify that the only top2 labels are left.
EXPECT_EQ(2, classification_list.classification_size());
for (int i = 0; i < classification_list.classification_size(); ++i) {
EXPECT_EQ((classification_list.classification_size() - i) * 0.5,
classification_list.classification(i).score());
}
}
} // namespace mediapipe

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The option proto for the TensorsToDetectionsCalculator.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorsToDetectionsCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional TensorsToDetectionsCalculatorOptions ext = 335742639;
}
// [Required] The number of output classes predicted by the detection model.
optional int32 num_classes = 1;
// [Required] The number of output boxes predicted by the detection model.
optional int32 num_boxes = 2;
// [Required] The number of output values per boxes predicted by the detection
// model. The values contain bounding boxes, keypoints, etc.
optional int32 num_coords = 3;
// The offset of keypoint coordinates in the location tensor.
optional int32 keypoint_coord_offset = 9;
// The number of predicted keypoints.
optional int32 num_keypoints = 10 [default = 0];
// The dimension of each keypoint, e.g. number of values predicted for each
// keypoint.
optional int32 num_values_per_keypoint = 11 [default = 2];
// The offset of box coordinates in the location tensor.
optional int32 box_coord_offset = 12 [default = 0];
// Parameters for decoding SSD detection model.
optional float x_scale = 4 [default = 0.0];
optional float y_scale = 5 [default = 0.0];
optional float w_scale = 6 [default = 0.0];
optional float h_scale = 7 [default = 0.0];
optional bool apply_exponential_on_box_size = 13 [default = false];
// Whether to reverse the order of predicted x, y from output.
// If false, the order is [y_center, x_center, h, w], if true the order is
// [x_center, y_center, w, h].
optional bool reverse_output_order = 14 [default = false];
// The ids of classes that should be ignored during decoding the score for
// each predicted box.
repeated int32 ignore_classes = 8;
optional bool sigmoid_score = 15 [default = false];
optional float score_clipping_thresh = 16;
// Whether the detection coordinates from the input tensors should be flipped
// vertically (along the y-direction). This is useful, for example, when the
// input tensors represent detections defined with a coordinate system where
// the origin is at the top-left corner, whereas the desired detection
// representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_vertically = 18 [default = false];
// Score threshold for perserving decoded detections.
optional float min_score_thresh = 19;
}

View File

@ -0,0 +1,97 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
// A calculator for converting Tensors to to a float or a float vector.
//
// Input:
// TENSORS - Vector of Tensors of type kFloat32. Only the first
// tensor will be used.
// Output:
// FLOAT(optional) - Converted single float number.
// FLOATS(optional) - Converted float vector.
//
// Notes: To output FLOAT stream, the input tensor must have size 1, e.g.
// only 1 float number in the tensor.
//
// Usage example:
// node {
// calculator: "TensorsToFloatsCalculator"
// input_stream: "TENSORS:tensors"
// output_stream: "FLOATS:floats"
// }
class TensorsToFloatsCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
};
REGISTER_CALCULATOR(TensorsToFloatsCalculator);
::mediapipe::Status TensorsToFloatsCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(cc->Inputs().HasTag("TENSORS"));
RET_CHECK(cc->Outputs().HasTag("FLOATS") || cc->Outputs().HasTag("FLOAT"));
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
if (cc->Outputs().HasTag("FLOATS")) {
cc->Outputs().Tag("FLOATS").Set<std::vector<float>>();
}
if (cc->Outputs().HasTag("FLOAT")) {
cc->Outputs().Tag("FLOAT").Set<float>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToFloatsCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToFloatsCalculator::Process(CalculatorContext* cc) {
RET_CHECK(!cc->Inputs().Tag("TENSORS").IsEmpty());
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
// TODO: Add option to specify which tensor to take from.
auto view = input_tensors[0].GetCpuReadView();
auto raw_floats = view.buffer<float>();
int num_values = input_tensors[0].shape().num_elements();
if (cc->Outputs().HasTag("FLOAT")) {
// TODO: Could add an index in the option to specifiy returning one
// value of a float array.
RET_CHECK_EQ(num_values, 1);
cc->Outputs().Tag("FLOAT").AddPacket(
MakePacket<float>(raw_floats[0]).At(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag("FLOATS")) {
auto output_floats = absl::make_unique<std::vector<float>>(
raw_floats, raw_floats + num_values);
cc->Outputs().Tag("FLOATS").Add(output_floats.release(),
cc->InputTimestamp());
}
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,250 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/tensor/tensors_to_landmarks_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
// A calculator for converting Tensors from regression models into landmarks.
// Note that if the landmarks in the tensor has more than 5 dimensions, only the
// first 5 dimensions will be converted to [x,y,z, visibility, presence].
//
// Input:
// TENSORS - Vector of Tensors of type kFloat32. Only the first tensor will be
// used. The size of the values must be (num_dimension x num_landmarks).
//
// FLIP_HORIZONTALLY (optional): Whether to flip landmarks horizontally or
// not. Overrides corresponding side packet and/or field in the calculator
// options.
//
// FLIP_VERTICALLY (optional): Whether to flip landmarks vertically or not.
// Overrides corresponding side packet and/or field in the calculator options.
//
// Input side packet:
// FLIP_HORIZONTALLY (optional): Whether to flip landmarks horizontally or
// not. Overrides the corresponding field in the calculator options.
//
// FLIP_VERTICALLY (optional): Whether to flip landmarks vertically or not.
// Overrides the corresponding field in the calculator options.
//
// Output:
// LANDMARKS(optional) - Result MediaPipe landmarks.
// NORM_LANDMARKS(optional) - Result MediaPipe normalized landmarks.
//
// Notes:
// To output normalized landmarks, user must provide the original input image
// size to the model using calculator option input_image_width and
// input_image_height.
// Usage example:
// node {
// calculator: "TensorsToLandmarksCalculator"
// input_stream: "TENSORS:landmark_tensors"
// output_stream: "LANDMARKS:landmarks"
// output_stream: "NORM_LANDMARKS:landmarks"
// options: {
// [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
// num_landmarks: 21
//
// input_image_width: 256
// input_image_height: 256
// }
// }
// }
class TensorsToLandmarksCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
::mediapipe::Status LoadOptions(CalculatorContext* cc);
int num_landmarks_ = 0;
bool flip_vertically_ = false;
bool flip_horizontally_ = false;
::mediapipe::TensorsToLandmarksCalculatorOptions options_;
};
REGISTER_CALCULATOR(TensorsToLandmarksCalculator);
::mediapipe::Status TensorsToLandmarksCalculator::GetContract(
CalculatorContract* cc) {
RET_CHECK(!cc->Inputs().GetTags().empty());
RET_CHECK(!cc->Outputs().GetTags().empty());
if (cc->Inputs().HasTag("TENSORS")) {
cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>();
}
if (cc->Inputs().HasTag("FLIP_HORIZONTALLY")) {
cc->Inputs().Tag("FLIP_HORIZONTALLY").Set<bool>();
}
if (cc->Inputs().HasTag("FLIP_VERTICALLY")) {
cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>();
}
if (cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY")) {
cc->InputSidePackets().Tag("FLIP_HORIZONTALLY").Set<bool>();
}
if (cc->InputSidePackets().HasTag("FLIP_VERTICALLY")) {
cc->InputSidePackets().Tag("FLIP_VERTICALLY").Set<bool>();
}
if (cc->Outputs().HasTag("LANDMARKS")) {
cc->Outputs().Tag("LANDMARKS").Set<LandmarkList>();
}
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
cc->Outputs().Tag("NORM_LANDMARKS").Set<NormalizedLandmarkList>();
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToLandmarksCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
MP_RETURN_IF_ERROR(LoadOptions(cc));
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
RET_CHECK(options_.has_input_image_height() &&
options_.has_input_image_width())
<< "Must provide input with/height for getting normalized landmarks.";
}
if (cc->Outputs().HasTag("LANDMARKS") &&
(options_.flip_vertically() || options_.flip_horizontally() ||
cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY") ||
cc->InputSidePackets().HasTag("FLIP_VERTICALLY"))) {
RET_CHECK(options_.has_input_image_height() &&
options_.has_input_image_width())
<< "Must provide input with/height for using flip_vertically option "
"when outputing landmarks in absolute coordinates.";
}
flip_horizontally_ =
cc->InputSidePackets().HasTag("FLIP_HORIZONTALLY")
? cc->InputSidePackets().Tag("FLIP_HORIZONTALLY").Get<bool>()
: options_.flip_horizontally();
flip_vertically_ =
cc->InputSidePackets().HasTag("FLIP_VERTICALLY")
? cc->InputSidePackets().Tag("FLIP_VERTICALLY").Get<bool>()
: options_.flip_vertically();
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToLandmarksCalculator::Process(
CalculatorContext* cc) {
// Override values if specified so.
if (cc->Inputs().HasTag("FLIP_HORIZONTALLY") &&
!cc->Inputs().Tag("FLIP_HORIZONTALLY").IsEmpty()) {
flip_horizontally_ = cc->Inputs().Tag("FLIP_HORIZONTALLY").Get<bool>();
}
if (cc->Inputs().HasTag("FLIP_VERTICALLY") &&
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
}
if (cc->Inputs().Tag("TENSORS").IsEmpty()) {
return ::mediapipe::OkStatus();
}
const auto& input_tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>();
int num_values = input_tensors[0].shape().num_elements();
const int num_dimensions = num_values / num_landmarks_;
CHECK_GT(num_dimensions, 0);
auto view = input_tensors[0].GetCpuReadView();
auto raw_landmarks = view.buffer<float>();
LandmarkList output_landmarks;
for (int ld = 0; ld < num_landmarks_; ++ld) {
const int offset = ld * num_dimensions;
Landmark* landmark = output_landmarks.add_landmark();
if (flip_horizontally_) {
landmark->set_x(options_.input_image_width() - raw_landmarks[offset]);
} else {
landmark->set_x(raw_landmarks[offset]);
}
if (num_dimensions > 1) {
if (flip_vertically_) {
landmark->set_y(options_.input_image_height() -
raw_landmarks[offset + 1]);
} else {
landmark->set_y(raw_landmarks[offset + 1]);
}
}
if (num_dimensions > 2) {
landmark->set_z(raw_landmarks[offset + 2]);
}
if (num_dimensions > 3) {
landmark->set_visibility(raw_landmarks[offset + 3]);
}
if (num_dimensions > 4) {
landmark->set_presence(raw_landmarks[offset + 4]);
}
}
// Output normalized landmarks if required.
if (cc->Outputs().HasTag("NORM_LANDMARKS")) {
NormalizedLandmarkList output_norm_landmarks;
for (int i = 0; i < output_landmarks.landmark_size(); ++i) {
const Landmark& landmark = output_landmarks.landmark(i);
NormalizedLandmark* norm_landmark = output_norm_landmarks.add_landmark();
norm_landmark->set_x(landmark.x() / options_.input_image_width());
norm_landmark->set_y(landmark.y() / options_.input_image_height());
// Scale Z coordinate as X + allow additional uniform normalization.
norm_landmark->set_z(landmark.z() / options_.input_image_width() /
options_.normalize_z());
norm_landmark->set_visibility(landmark.visibility());
norm_landmark->set_presence(landmark.presence());
}
cc->Outputs()
.Tag("NORM_LANDMARKS")
.AddPacket(MakePacket<NormalizedLandmarkList>(output_norm_landmarks)
.At(cc->InputTimestamp()));
}
// Output absolute landmarks.
if (cc->Outputs().HasTag("LANDMARKS")) {
cc->Outputs()
.Tag("LANDMARKS")
.AddPacket(MakePacket<LandmarkList>(output_landmarks)
.At(cc->InputTimestamp()));
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status TensorsToLandmarksCalculator::LoadOptions(
CalculatorContext* cc) {
// Get calculator options specified in the graph.
options_ = cc->Options<::mediapipe::TensorsToLandmarksCalculatorOptions>();
RET_CHECK(options_.has_num_landmarks());
num_landmarks_ = options_.num_landmarks();
return ::mediapipe::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,54 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The option proto for the TensorsToLandmarksCalculator.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message TensorsToLandmarksCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional TensorsToLandmarksCalculatorOptions ext = 335742640;
}
// [Required] Number of landmarks from the output of the model.
optional int32 num_landmarks = 1;
// Size of the input image for the model. These options are used only when
// normalized landmarks are needed. Z coordinate is scaled as X assuming
// a weak perspective projection camera model.
optional int32 input_image_width = 2;
optional int32 input_image_height = 3;
// Whether the detection coordinates from the input tensors should be flipped
// vertically (along the y-direction). This is useful, for example, when the
// input tensors represent detections defined with a coordinate system where
// the origin is at the top-left corner, whereas the desired detection
// representation has a bottom-left origin (e.g., in OpenGL).
optional bool flip_vertically = 4 [default = false];
// Whether the detection coordinates from the input tensors should be flipped
// horizontally (along the x-direction). This is useful, for example, when the
// input image is horizontally flipped in ImageTransformationCalculator
// beforehand.
optional bool flip_horizontally = 6 [default = false];
// A value that Z coordinates should be divided by. This option is used only
// when normalized landmarks are needed. It is applied in addition to Z
// coordinate being re-scaled as X.
optional float normalize_z = 5 [default = 1.0];
}

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

View File

@ -0,0 +1,3 @@
classA
classB
classC

View File

@ -84,7 +84,7 @@ namespace mpms = ::mediapipe::mediasequence;
// node {
// calculator: "UnpackMediaSequenceCalculator"
// input_side_packet: "SEQUENCE_EXAMPLE:example_input_side_packet"
// input_side_packet: "ROOT_DIRECTORY:path_to_dataset_root_directory"
// input_side_packet: "DATASET_ROOT:path_to_dataset_root_directory"
// output_side_packet: "DATA_PATH:full_path_to_data_element"
// output_side_packet: "RESAMPLER_OPTIONS:packet_resampler_options"
// options {

View File

@ -404,12 +404,7 @@ bool ShouldUseGpu(CC* cc) {
MP_RETURN_IF_ERROR(LoadDelegate(cc));
#endif
} else {
// TODO: why only on these platforms?
// It seems that the XNNPACK delegate fails to load on Linux.
#if defined(__EMSCRIPTEN__) || defined(MEDIAPIPE_ANDROID) || \
defined(MEDIAPIPE_IOS)
MP_RETURN_IF_ERROR(LoadDelegate(cc));
#endif // __EMSCRIPTEN__ || MEDIAPIPE_ANDROID || MEDIAPIPE_IOS
}
return ::mediapipe::OkStatus();
}

View File

@ -929,6 +929,7 @@ cc_library(
deps = [
":collection_has_min_size_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check",
@ -1043,3 +1044,26 @@ cc_library(
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "logic_calculator_proto",
srcs = ["logic_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "logic_calculator",
srcs = ["logic_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":logic_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)

View File

@ -17,18 +17,24 @@
#include <vector>
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
namespace mediapipe {
typedef CollectionHasMinSizeCalculator<std::vector<::mediapipe::NormalizedRect>>
typedef CollectionHasMinSizeCalculator<std::vector<mediapipe::NormalizedRect>>
NormalizedRectVectorHasMinSizeCalculator;
REGISTER_CALCULATOR(NormalizedRectVectorHasMinSizeCalculator);
typedef CollectionHasMinSizeCalculator<
std::vector<::mediapipe::NormalizedLandmarkList>>
std::vector<mediapipe::NormalizedLandmarkList>>
NormalizedLandmarkListVectorHasMinSizeCalculator;
REGISTER_CALCULATOR(NormalizedLandmarkListVectorHasMinSizeCalculator);
typedef CollectionHasMinSizeCalculator<
std::vector<mediapipe::ClassificationList>>
ClassificationListVectorHasMinSizeCalculator;
REGISTER_CALCULATOR(ClassificationListVectorHasMinSizeCalculator);
} // namespace mediapipe

View File

@ -14,6 +14,7 @@
#include "mediapipe/calculators/util/detections_to_rects_calculator.h"
#include <cmath>
#include <limits>
#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
@ -36,19 +37,70 @@ constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kRectsTag[] = "RECTS";
constexpr char kNormRectsTag[] = "NORM_RECTS";
constexpr float kMinFloat = std::numeric_limits<float>::lowest();
constexpr float kMaxFloat = std::numeric_limits<float>::max();
::mediapipe::Status NormRectFromKeyPoints(const LocationData& location_data,
NormalizedRect* rect) {
RET_CHECK_GT(location_data.relative_keypoints_size(), 1)
<< "2 or more key points required to calculate a rect.";
float xmin = kMaxFloat;
float ymin = kMaxFloat;
float xmax = kMinFloat;
float ymax = kMinFloat;
for (int i = 0; i < location_data.relative_keypoints_size(); ++i) {
const auto& kp = location_data.relative_keypoints(i);
xmin = std::min(xmin, kp.x());
ymin = std::min(ymin, kp.y());
xmax = std::max(xmax, kp.x());
ymax = std::max(ymax, kp.y());
}
rect->set_x_center((xmin + xmax) / 2);
rect->set_y_center((ymin + ymax) / 2);
rect->set_width(xmax - xmin);
rect->set_height(ymax - ymin);
return ::mediapipe::OkStatus();
}
template <class B, class R>
void RectFromBox(B box, R* rect) {
rect->set_x_center(box.xmin() + box.width() / 2);
rect->set_y_center(box.ymin() + box.height() / 2);
rect->set_width(box.width());
rect->set_height(box.height());
}
} // namespace
::mediapipe::Status DetectionsToRectsCalculator::DetectionToRect(
const Detection& detection, const DetectionSpec& detection_spec,
Rect* rect) {
const LocationData location_data = detection.location_data();
RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX)
<< "Only Detection with formats of BOUNDING_BOX can be converted to Rect";
const LocationData::BoundingBox bounding_box = location_data.bounding_box();
rect->set_x_center(bounding_box.xmin() + bounding_box.width() / 2);
rect->set_y_center(bounding_box.ymin() + bounding_box.height() / 2);
rect->set_width(bounding_box.width());
rect->set_height(bounding_box.height());
switch (options_.conversion_mode()) {
case mediapipe::DetectionsToRectsCalculatorOptions_ConversionMode_DEFAULT:
case mediapipe::
DetectionsToRectsCalculatorOptions_ConversionMode_USE_BOUNDING_BOX: {
RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX)
<< "Only Detection with formats of BOUNDING_BOX can be converted to "
"Rect";
RectFromBox(location_data.bounding_box(), rect);
break;
}
case mediapipe::
DetectionsToRectsCalculatorOptions_ConversionMode_USE_KEYPOINTS: {
RET_CHECK(detection_spec.image_size.has_value())
<< "Rect with absolute coordinates calculation requires image size.";
const int width = detection_spec.image_size->first;
const int height = detection_spec.image_size->second;
NormalizedRect norm_rect;
MP_RETURN_IF_ERROR(NormRectFromKeyPoints(location_data, &norm_rect));
rect->set_x_center(std::round(norm_rect.x_center() * width));
rect->set_y_center(std::round(norm_rect.y_center() * height));
rect->set_width(std::round(norm_rect.width() * width));
rect->set_height(std::round(norm_rect.height() * height));
break;
}
}
return ::mediapipe::OkStatus();
}
@ -56,15 +108,22 @@ constexpr char kNormRectsTag[] = "NORM_RECTS";
const Detection& detection, const DetectionSpec& detection_spec,
NormalizedRect* rect) {
const LocationData location_data = detection.location_data();
RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX)
<< "Only Detection with formats of RELATIVE_BOUNDING_BOX can be "
"converted to NormalizedRect";
const LocationData::RelativeBoundingBox bounding_box =
location_data.relative_bounding_box();
rect->set_x_center(bounding_box.xmin() + bounding_box.width() / 2);
rect->set_y_center(bounding_box.ymin() + bounding_box.height() / 2);
rect->set_width(bounding_box.width());
rect->set_height(bounding_box.height());
switch (options_.conversion_mode()) {
case mediapipe::DetectionsToRectsCalculatorOptions_ConversionMode_DEFAULT:
case mediapipe::
DetectionsToRectsCalculatorOptions_ConversionMode_USE_BOUNDING_BOX: {
RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX)
<< "Only Detection with formats of RELATIVE_BOUNDING_BOX can be "
"converted to NormalizedRect";
RectFromBox(location_data.relative_bounding_box(), rect);
break;
}
case mediapipe::
DetectionsToRectsCalculatorOptions_ConversionMode_USE_KEYPOINTS: {
MP_RETURN_IF_ERROR(NormRectFromKeyPoints(location_data, rect));
break;
}
}
return ::mediapipe::OkStatus();
}

View File

@ -35,4 +35,12 @@ message DetectionsToRectsCalculatorOptions {
// Whether to output a zero-rect (with origin and size both zero) when the
// input detection vector is empty.
optional bool output_zero_rect_for_empty_detections = 5;
enum ConversionMode {
DEFAULT = 0;
USE_BOUNDING_BOX = 1;
USE_KEYPOINTS = 2;
}
optional ConversionMode conversion_mode = 6;
}

View File

@ -12,6 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
#include <vector>
#include "mediapipe/framework/calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
@ -26,6 +30,21 @@
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
MATCHER_P4(RectEq, x_center, y_center, width, height, "") {
return testing::Value(arg.x_center(), testing::Eq(x_center)) &&
testing::Value(arg.y_center(), testing::Eq(y_center)) &&
testing::Value(arg.width(), testing::Eq(width)) &&
testing::Value(arg.height(), testing::Eq(height));
}
MATCHER_P4(NormRectEq, x_center, y_center, width, height, "") {
return testing::Value(arg.x_center(), testing::FloatEq(x_center)) &&
testing::Value(arg.y_center(), testing::FloatEq(y_center)) &&
testing::Value(arg.width(), testing::FloatEq(width)) &&
testing::Value(arg.height(), testing::FloatEq(height));
}
Detection DetectionWithLocationData(int32 xmin, int32 ymin, int32 width,
int32 height) {
@ -39,6 +58,19 @@ Detection DetectionWithLocationData(int32 xmin, int32 ymin, int32 width,
return detection;
}
Detection DetectionWithKeyPoints(
const std::vector<std::pair<float, float>>& key_points) {
Detection detection;
LocationData* location_data = detection.mutable_location_data();
std::for_each(key_points.begin(), key_points.end(),
[location_data](std::pair<float, float> kp) {
auto* new_kp = location_data->add_relative_keypoints();
new_kp->set_x(kp.first);
new_kp->set_y(kp.second);
});
return detection;
}
Detection DetectionWithRelativeLocationData(double xmin, double ymin,
double width, double height) {
Detection detection;
@ -70,10 +102,61 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToRect) {
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
ASSERT_EQ(1, output.size());
const auto& rect = output[0].Get<Rect>();
EXPECT_EQ(rect.width(), 300);
EXPECT_EQ(rect.height(), 400);
EXPECT_EQ(rect.x_center(), 250);
EXPECT_EQ(rect.y_center(), 400);
EXPECT_THAT(rect, RectEq(250, 400, 300, 400));
}
::mediapipe::StatusOr<Rect> RunDetectionKeyPointsToRectCalculation(
Detection detection, std::pair<int, int> image_size) {
CalculatorRunner runner(ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "RECT:rect"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
conversion_mode: USE_KEYPOINTS
}
}
)"));
runner.MutableInputs()
->Tag("DETECTION")
.packets.push_back(MakePacket<Detection>(std::move(detection))
.At(Timestamp::PostStream()));
runner.MutableInputs()
->Tag("IMAGE_SIZE")
.packets.push_back(MakePacket<std::pair<int, int>>(image_size)
.At(Timestamp::PostStream()));
MP_RETURN_IF_ERROR(runner.Run());
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
RET_CHECK_EQ(output.size(), 1);
return output[0].Get<Rect>();
}
TEST(DetectionsToRectsCalculatorTest, DetectionKeyPointsToRect) {
auto status_or_value = RunDetectionKeyPointsToRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {1.0f, 1.0f}}),
/*image_size=*/{640, 480});
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(320, 240, 640, 480));
status_or_value = RunDetectionKeyPointsToRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.25f, 0.25f}, {0.75f, 0.75f}}),
/*image_size=*/{640, 480});
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(320, 240, 320, 240));
status_or_value = RunDetectionKeyPointsToRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {0.5f, 0.5f}}),
/*image_size=*/{640, 480});
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(160, 120, 320, 240));
status_or_value = RunDetectionKeyPointsToRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.5f, 0.5f}, {1.0f, 1.0f}}),
/*image_size=*/{640, 480});
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(480, 360, 320, 240));
}
TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRect) {
@ -95,10 +178,56 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRect) {
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
ASSERT_EQ(1, output.size());
const auto& rect = output[0].Get<NormalizedRect>();
EXPECT_FLOAT_EQ(rect.width(), 0.3);
EXPECT_FLOAT_EQ(rect.height(), 0.4);
EXPECT_FLOAT_EQ(rect.x_center(), 0.25);
EXPECT_FLOAT_EQ(rect.y_center(), 0.4);
EXPECT_THAT(rect, NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
}
::mediapipe::StatusOr<NormalizedRect>
RunDetectionKeyPointsToNormRectCalculation(Detection detection) {
CalculatorRunner runner(ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:detection"
output_stream: "NORM_RECT:rect"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
conversion_mode: USE_KEYPOINTS
}
}
)"));
runner.MutableInputs()
->Tag("DETECTION")
.packets.push_back(MakePacket<Detection>(std::move(detection))
.At(Timestamp::PostStream()));
MP_RETURN_IF_ERROR(runner.Run());
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
RET_CHECK_EQ(output.size(), 1);
return output[0].Get<NormalizedRect>();
}
TEST(DetectionsToRectsCalculatorTest, DetectionKeyPointsToNormalizedRect) {
NormalizedRect rect;
auto status_or_value = RunDetectionKeyPointsToNormRectCalculation(
/*detection=*/DetectionWithKeyPoints(
{{0.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 1.0f}}));
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.5f, 0.5f, 1.0f, 1.0f));
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
/*detection=*/DetectionWithKeyPoints(
{{0.25f, 0.25f}, {0.75f, 0.25f}, {0.75f, 0.75f}}));
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.5f, 0.5f, 0.5f, 0.5f));
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.0f, 0.0f}, {0.5f, 0.5f}}));
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.25f, 0.25f, 0.5f, 0.5f));
status_or_value = RunDetectionKeyPointsToNormRectCalculation(
/*detection=*/DetectionWithKeyPoints({{0.5f, 0.5f}, {1.0f, 1.0f}}));
MP_ASSERT_OK(status_or_value);
EXPECT_THAT(status_or_value.ValueOrDie(), RectEq(0.75f, 0.75f, 0.5f, 0.5f));
}
TEST(DetectionsToRectsCalculatorTest, DetectionsToRect) {
@ -121,10 +250,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToRect) {
const std::vector<Packet>& output = runner.Outputs().Tag("RECT").packets;
ASSERT_EQ(1, output.size());
const auto& rect = output[0].Get<Rect>();
EXPECT_EQ(rect.width(), 300);
EXPECT_EQ(rect.height(), 400);
EXPECT_EQ(rect.x_center(), 250);
EXPECT_EQ(rect.y_center(), 400);
EXPECT_THAT(rect, RectEq(250, 400, 300, 400));
}
TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRect) {
@ -147,10 +273,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRect) {
const std::vector<Packet>& output = runner.Outputs().Tag("NORM_RECT").packets;
ASSERT_EQ(1, output.size());
const auto& rect = output[0].Get<NormalizedRect>();
EXPECT_FLOAT_EQ(rect.width(), 0.3);
EXPECT_FLOAT_EQ(rect.height(), 0.4);
EXPECT_FLOAT_EQ(rect.x_center(), 0.25);
EXPECT_FLOAT_EQ(rect.y_center(), 0.4);
EXPECT_THAT(rect, NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
}
TEST(DetectionsToRectsCalculatorTest, DetectionsToRects) {
@ -173,15 +296,9 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToRects) {
const std::vector<Packet>& output = runner.Outputs().Tag("RECTS").packets;
ASSERT_EQ(1, output.size());
const auto& rects = output[0].Get<std::vector<Rect>>();
EXPECT_EQ(rects.size(), 2);
EXPECT_EQ(rects[0].width(), 300);
EXPECT_EQ(rects[0].height(), 400);
EXPECT_EQ(rects[0].x_center(), 250);
EXPECT_EQ(rects[0].y_center(), 400);
EXPECT_EQ(rects[1].width(), 400);
EXPECT_EQ(rects[1].height(), 500);
EXPECT_EQ(rects[1].x_center(), 400);
EXPECT_EQ(rects[1].y_center(), 550);
ASSERT_EQ(rects.size(), 2);
EXPECT_THAT(rects[0], RectEq(250, 400, 300, 400));
EXPECT_THAT(rects[1], RectEq(400, 550, 400, 500));
}
TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRects) {
@ -205,15 +322,9 @@ TEST(DetectionsToRectsCalculatorTest, DetectionsToNormalizedRects) {
runner.Outputs().Tag("NORM_RECTS").packets;
ASSERT_EQ(1, output.size());
const auto& rects = output[0].Get<std::vector<NormalizedRect>>();
EXPECT_EQ(rects.size(), 2);
EXPECT_FLOAT_EQ(rects[0].width(), 0.3);
EXPECT_FLOAT_EQ(rects[0].height(), 0.4);
EXPECT_FLOAT_EQ(rects[0].x_center(), 0.25);
EXPECT_FLOAT_EQ(rects[0].y_center(), 0.4);
EXPECT_FLOAT_EQ(rects[1].width(), 0.4);
EXPECT_FLOAT_EQ(rects[1].height(), 0.5);
EXPECT_FLOAT_EQ(rects[1].x_center(), 0.4);
EXPECT_FLOAT_EQ(rects[1].y_center(), 0.55);
ASSERT_EQ(rects.size(), 2);
EXPECT_THAT(rects[0], NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
EXPECT_THAT(rects[1], NormRectEq(0.4f, 0.55f, 0.4f, 0.5f));
}
TEST(DetectionsToRectsCalculatorTest, DetectionToRects) {
@ -236,10 +347,7 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToRects) {
ASSERT_EQ(1, output.size());
const auto& rects = output[0].Get<std::vector<Rect>>();
EXPECT_EQ(rects.size(), 1);
EXPECT_EQ(rects[0].width(), 300);
EXPECT_EQ(rects[0].height(), 400);
EXPECT_EQ(rects[0].x_center(), 250);
EXPECT_EQ(rects[0].y_center(), 400);
EXPECT_THAT(rects[0], RectEq(250, 400, 300, 400));
}
TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRects) {
@ -262,11 +370,8 @@ TEST(DetectionsToRectsCalculatorTest, DetectionToNormalizedRects) {
runner.Outputs().Tag("NORM_RECTS").packets;
ASSERT_EQ(1, output.size());
const auto& rects = output[0].Get<std::vector<NormalizedRect>>();
EXPECT_EQ(rects.size(), 1);
EXPECT_FLOAT_EQ(rects[0].width(), 0.3);
EXPECT_FLOAT_EQ(rects[0].height(), 0.4);
EXPECT_FLOAT_EQ(rects[0].x_center(), 0.25);
EXPECT_FLOAT_EQ(rects[0].y_center(), 0.4);
ASSERT_EQ(rects.size(), 1);
EXPECT_THAT(rects[0], NormRectEq(0.25f, 0.4f, 0.3f, 0.4f));
}
TEST(DetectionsToRectsCalculatorTest, WrongInputToRect) {
@ -309,4 +414,5 @@ TEST(DetectionsToRectsCalculatorTest, WrongInputToNormalizedRect) {
"Only Detection with formats of RELATIVE_BOUNDING_BOX"));
}
} // namespace
} // namespace mediapipe

View File

@ -0,0 +1,105 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "mediapipe/calculators/util/logic_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
using mediapipe::LogicCalculatorOptions;
// A calculator to compute logical functions of bool inputs.
// With just one input, the output equals the input as expected.
//
// Inputs: One or more bool inputs, which may be input-stream-packets,
// input-side-packets, or options input-values.
//
// Outputs: One bool stream.
//
// Example config:
// node {
// calculator: "LogicCalculator"
// input_stream: "has_data"
// input_side_packet: "enable"
// input_stream: "is_valid"
// output_stream: "process_data"
// options {
// [mediapipe.LogicCalculatorOptions.ext] {
// op: AND
// input_value: true
// }
// }
// }
class LogicCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc) {
for (int k = 0; k < cc->Inputs().NumEntries(""); ++k) {
cc->Inputs().Index(k).Set<bool>();
}
for (int k = 0; k < cc->InputSidePackets().NumEntries(""); ++k) {
cc->InputSidePackets().Index(k).Set<bool>();
}
RET_CHECK_GE(cc->Inputs().NumEntries("") +
cc->InputSidePackets().NumEntries("") +
cc->Options<LogicCalculatorOptions>().input_value_size(),
1);
RET_CHECK_EQ(cc->Outputs().NumEntries(""), 1);
cc->Outputs().Index(0).Set<bool>();
return ::mediapipe::OkStatus();
}
::mediapipe::Status Open(CalculatorContext* cc) override {
options_ = cc->Options<LogicCalculatorOptions>();
cc->SetOffset(TimestampDiff(0));
return ::mediapipe::OkStatus();
}
bool LogicalOp(bool b1, bool b2) {
switch (options_.op()) {
case LogicCalculatorOptions::AND:
return b1 && b2;
case LogicCalculatorOptions::OR:
return b1 || b2;
case LogicCalculatorOptions::XOR:
return b1 ^ b2;
}
return false;
}
::mediapipe::Status Process(CalculatorContext* cc) override {
bool result = options_.op() == LogicCalculatorOptions::AND ? true : false;
for (int k = 0; k < options_.input_value_size(); ++k) {
result = LogicalOp(result, options_.input_value(k));
}
for (int k = 0; k < cc->Inputs().NumEntries(""); ++k) {
result = LogicalOp(result, cc->Inputs().Index(k).Value().Get<bool>());
}
for (int k = 0; k < cc->InputSidePackets().NumEntries(""); ++k) {
result = LogicalOp(result, cc->InputSidePackets().Index(k).Get<bool>());
}
if (options_.negate()) {
result = !result;
}
cc->Outputs().Index(0).Add(new bool(result), cc->InputTimestamp());
return ::mediapipe::OkStatus();
}
private:
LogicCalculatorOptions options_;
};
REGISTER_CALCULATOR(LogicCalculator);
} // namespace mediapipe

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -12,10 +12,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#import <UIKit/UIKit.h>
syntax = "proto2";
#import "mediapipe/examples/ios/common/CommonViewController.h"
package mediapipe;
@interface MultiHandTrackingViewController : CommonViewController
import "mediapipe/framework/calculator.proto";
@end
message LogicCalculatorOptions {
extend CalculatorOptions {
optional LogicCalculatorOptions ext = 338731246;
}
// The logical operation to apply.
enum Operation {
AND = 0;
OR = 1;
XOR = 2;
}
optional Operation op = 1;
// Whether to negate the result.
optional bool negate = 2;
// Optional bool input values.
repeated bool input_value = 3;
}

View File

@ -36,9 +36,8 @@ android_binary(
name = "facedetectioncpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/face_detection:mobile_cpu.binarypb",
"//mediapipe/models:face_detection_front.tflite",
"//mediapipe/models:face_detection_front_labelmap.txt",
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -47,7 +46,7 @@ android_binary(
"appName": "Face Detection (CPU)",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "mobile_cpu.binarypb",
"binaryGraphName": "face_detection_mobile_cpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",

View File

@ -36,9 +36,8 @@ android_binary(
name = "facedetectiongpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/face_detection:mobile_gpu.binarypb",
"//mediapipe/models:face_detection_front.tflite",
"//mediapipe/models:face_detection_front_labelmap.txt",
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -47,7 +46,7 @@ android_binary(
"appName": "Face Detection",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "mobile_gpu.binarypb",
"binaryGraphName": "face_detection_mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",

View File

@ -37,8 +37,7 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
"//mediapipe/models:palm_detection.tflite",
"//mediapipe/models:palm_detection_labelmap.txt",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -37,10 +37,9 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
"//mediapipe/models:handedness.txt",
"//mediapipe/models:hand_landmark.tflite",
"//mediapipe/models:palm_detection.tflite",
"//mediapipe/models:palm_detection_labelmap.txt",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -18,76 +18,75 @@ import android.os.Bundle;
import android.util.Log;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.framework.AndroidPacketCreator;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.framework.PacketGetter;
import com.google.protobuf.InvalidProtocolBufferException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** Main activity of MediaPipe hand tracking app. */
public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private static final String TAG = "MainActivity";
private static final String OUTPUT_HAND_PRESENCE_STREAM_NAME = "hand_presence";
private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
// Max number of hands to detect/process.
private static final int NUM_HANDS = 2;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
processor.addPacketCallback(
OUTPUT_HAND_PRESENCE_STREAM_NAME,
(packet) -> {
Boolean handPresence = PacketGetter.getBool(packet);
if (!handPresence) {
Log.d(
TAG,
"[TS:" + packet.getTimestamp() + "] Hand presence is false, no hands detected.");
}
});
AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
processor.setInputSidePackets(inputSidePackets);
// To show verbose logging, run:
// adb shell setprop log.tag.MainActivity VERBOSE
if (Log.isLoggable(TAG, Log.VERBOSE)) {
processor.addPacketCallback(
OUTPUT_LANDMARKS_STREAM_NAME,
(packet) -> {
byte[] landmarksRaw = PacketGetter.getProtoBytes(packet);
try {
NormalizedLandmarkList landmarks = NormalizedLandmarkList.parseFrom(landmarksRaw);
if (landmarks == null) {
Log.v(TAG, "[TS:" + packet.getTimestamp() + "] No hand landmarks.");
return;
}
// Note: If hand_presence is false, these landmarks are useless.
OUTPUT_LANDMARKS_STREAM_NAME,
(packet) -> {
Log.v(TAG, "Received multi-hand landmarks packet.");
List<NormalizedLandmarkList> multiHandLandmarks =
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
Log.v(
TAG,
"[TS:"
+ packet.getTimestamp()
+ "] #Landmarks for hand: "
+ landmarks.getLandmarkCount());
Log.v(TAG, getLandmarksDebugString(landmarks));
} catch (InvalidProtocolBufferException e) {
Log.e(TAG, "Couldn't Exception received - " + e);
return;
}
});
+ "] "
+ getMultiHandLandmarksDebugString(multiHandLandmarks));
});
}
}
private static String getLandmarksDebugString(NormalizedLandmarkList landmarks) {
int landmarkIndex = 0;
String landmarksString = "";
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
landmarksString +=
"\t\tLandmark["
+ landmarkIndex
+ "]: ("
+ landmark.getX()
+ ", "
+ landmark.getY()
+ ", "
+ landmark.getZ()
+ ")\n";
++landmarkIndex;
private String getMultiHandLandmarksDebugString(List<NormalizedLandmarkList> multiHandLandmarks) {
if (multiHandLandmarks.isEmpty()) {
return "No hand landmarks";
}
return landmarksString;
String multiHandLandmarksStr = "Number of hands detected: " + multiHandLandmarks.size() + "\n";
int handIndex = 0;
for (NormalizedLandmarkList landmarks : multiHandLandmarks) {
multiHandLandmarksStr +=
"\t#Hand landmarks for hand[" + handIndex + "]: " + landmarks.getLandmarkCount() + "\n";
int landmarkIndex = 0;
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
multiHandLandmarksStr +=
"\t\tLandmark ["
+ landmarkIndex
+ "]: ("
+ landmark.getX()
+ ", "
+ landmark.getY()
+ ", "
+ landmark.getZ()
+ ")\n";
++landmarkIndex;
}
++handIndex;
}
return multiHandLandmarksStr;
}
}

View File

@ -1,64 +0,0 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
android_binary(
name = "multihandtrackinggpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/hand_tracking:multi_hand_tracking_mobile_gpu.binarypb",
"//mediapipe/models:handedness.txt",
"//mediapipe/models:hand_landmark.tflite",
"//mediapipe/models:palm_detection.tflite",
"//mediapipe/models:palm_detection_labelmap.txt",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.multihandtrackinggpu",
"appName": "Multi-hand Tracking",
"mainActivity": ".MainActivity",
"cameraFacingFront": "True",
"binaryGraphName": "multi_hand_tracking_mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
},
multidex = "native",
deps = [
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
],
)

View File

@ -1,80 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.apps.multihandtrackinggpu;
import android.os.Bundle;
import android.util.Log;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.framework.PacketGetter;
import java.util.List;
/** Main activity of MediaPipe multi-hand tracking app. */
public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private static final String TAG = "MainActivity";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_hand_landmarks";
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
// To show verbose logging, run:
// adb shell setprop log.tag.MainActivity VERBOSE
if (Log.isLoggable(TAG, Log.VERBOSE)) {
processor.addPacketCallback(
OUTPUT_LANDMARKS_STREAM_NAME,
(packet) -> {
Log.v(TAG, "Received multi-hand landmarks packet.");
List<NormalizedLandmarkList> multiHandLandmarks =
PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser());
Log.v(
TAG,
"[TS:"
+ packet.getTimestamp()
+ "] "
+ getMultiHandLandmarksDebugString(multiHandLandmarks));
});
}
}
private String getMultiHandLandmarksDebugString(List<NormalizedLandmarkList> multiHandLandmarks) {
if (multiHandLandmarks.isEmpty()) {
return "No hand landmarks";
}
String multiHandLandmarksStr = "Number of hands detected: " + multiHandLandmarks.size() + "\n";
int handIndex = 0;
for (NormalizedLandmarkList landmarks : multiHandLandmarks) {
multiHandLandmarksStr +=
"\t#Hand landmarks for hand[" + handIndex + "]: " + landmarks.getLandmarkCount() + "\n";
int landmarkIndex = 0;
for (NormalizedLandmark landmark : landmarks.getLandmarkList()) {
multiHandLandmarksStr +=
"\t\tLandmark ["
+ landmarkIndex
+ "]: ("
+ landmark.getX()
+ ", "
+ landmark.getY()
+ ", "
+ landmark.getZ()
+ ")\n";
++landmarkIndex;
}
++handIndex;
}
return multiHandLandmarksStr;
}
}

View File

@ -1,4 +1,4 @@
# Copyright 2019 The MediaPipe Authors.
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -12,16 +12,64 @@
# See the License for the specific language governing permissions and
# limitations under the License.
load("@bazel_skylib//lib:selects.bzl", "selects")
load(":build_defs.bzl", "generate_manifest_values")
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
config_setting(
name = "use_chair",
define_values = {
"chair": "true",
},
)
config_setting(
name = "use_cup",
define_values = {
"cup": "true",
},
)
config_setting(
name = "use_camera",
define_values = {
"camera": "true",
},
)
config_setting(
name = "use_shoe_1stage",
define_values = {
"shoe_1stage": "true",
},
)
config_setting(
name = "use_chair_1stage",
define_values = {
"chair_1stage": "true",
},
)
selects.config_setting_group(
name = "1stage",
match_any = [
":use_shoe_1stage",
":use_chair_1stage",
],
)
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/object_detection_3d:mobile_calculators",
deps = select({
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_calculators"],
":1stage": ["//mediapipe/graphs/object_detection_3d:mobile_calculators_1stage"],
}) + [
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
@ -32,67 +80,108 @@ cc_library(
alwayslink = 1,
)
# To use the "chair" model instead of the default "shoes" model,
# add "--define chair=true" to the bazel build command.
config_setting(
name = "use_chair_model",
define_values = {
"chair": "true",
},
)
genrule(
name = "binary_graph",
srcs = select({
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_shoe"],
":use_chair_model": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_chair"],
"//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph"],
":1stage": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_1stage_binary_graph"],
}),
outs = ["object_detection_3d.binarypb"],
cmd = "cp $< $@",
)
MODELS_DIR = "//mediapipe/models"
genrule(
name = "model",
srcs = select({
"//conditions:default": ["//mediapipe/models:object_detection_3d_sneakers.tflite"],
":use_chair_model": ["//mediapipe/models:object_detection_3d_chair.tflite"],
"//conditions:default": [MODELS_DIR + ":object_detection_3d_sneakers.tflite"],
":use_chair": [MODELS_DIR + ":object_detection_3d_chair.tflite"],
":use_cup": [MODELS_DIR + ":object_detection_3d_cup.tflite"],
":use_camera": [MODELS_DIR + ":object_detection_3d_camera.tflite"],
":use_shoe_1stage": [MODELS_DIR + ":object_detection_3d_sneakers_1stage.tflite"],
":use_chair_1stage": [MODELS_DIR + ":object_detection_3d_chair_1stage.tflite"],
}),
outs = ["object_detection_3d.tflite"],
cmd = "cp $< $@",
)
MANIFESTS_DIR = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/manifests"
android_library(
name = "manifest_lib",
exports_manifest = 1,
manifest = select({
"//conditions:default": MANIFESTS_DIR + ":AndroidManifestSneaker.xml",
":use_chair": MANIFESTS_DIR + ":AndroidManifestChair.xml",
":use_cup": MANIFESTS_DIR + ":AndroidManifestCup.xml",
":use_camera": MANIFESTS_DIR + ":AndroidManifestCamera.xml",
":use_shoe_1stage": MANIFESTS_DIR + ":AndroidManifestSneaker.xml",
":use_chair_1stage": MANIFESTS_DIR + ":AndroidManifestChair.xml",
}),
deps = [
"//third_party:opencv",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:com_google_guava_guava",
],
)
ASSETS_DIR = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets"
genrule(
name = "mesh",
srcs = select({
"//conditions:default": [ASSETS_DIR + "/sneaker:model.obj.uuu"],
":use_chair": [ASSETS_DIR + "/chair:model.obj.uuu"],
":use_cup": [ASSETS_DIR + "/cup:model.obj.uuu"],
":use_camera": [ASSETS_DIR + "/camera:model.obj.uuu"],
":use_shoe_1stage": [ASSETS_DIR + "/sneaker:model.obj.uuu"],
":use_chair_1stage": [ASSETS_DIR + "/chair:model.obj.uuu"],
}),
outs = ["model.obj.uuu"],
cmd = "cp $< $@",
)
genrule(
name = "texture",
srcs = select({
"//conditions:default": [ASSETS_DIR + "/sneaker:texture.jpg"],
":use_chair": [ASSETS_DIR + "/chair:texture.jpg"],
":use_cup": [ASSETS_DIR + "/cup:texture.jpg"],
":use_camera": [ASSETS_DIR + "/camera:texture.jpg"],
":use_shoe_1stage": [ASSETS_DIR + "/sneaker:texture.jpg"],
":use_chair_1stage": [ASSETS_DIR + "/chair:texture.jpg"],
}),
outs = ["texture.jpg"],
cmd = "cp $< $@",
)
android_binary(
name = "objectdetection3d",
srcs = glob(["*.java"]),
assets = [
":binary_graph",
":model",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:box.obj.uuu",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:classic_colors.png",
] + select({
"//conditions:default": [
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:model.obj.uuu",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:texture.jpg",
],
":use_chair_model": [
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:model.obj.uuu",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:texture.jpg",
],
}),
":mesh",
":texture",
MODELS_DIR + ":object_detection_ssd_mobilenetv2_oidv4_fp16.tflite",
MODELS_DIR + ":object_detection_oidv4_labelmap.pbtxt",
ASSETS_DIR + ":box.obj.uuu",
ASSETS_DIR + ":classic_colors.png",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.objectdetection3d",
"appName": "Objectron",
"mainActivity": ".MainActivity",
"cameraFacingFront": "False",
"binaryGraphName": "object_detection_3d.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
},
manifest_values = select({
"//conditions:default": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_shoe", "Shoe Objectron"),
":use_chair": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_chair", "Chair Objectron"),
":use_cup": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_cup", "Cup Objectron"),
":use_camera": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_camera", "Camera Objectron"),
":use_shoe_1stage": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_shoe_1stage", "Single Stage Shoe Objectron"),
":use_chair_1stage": generate_manifest_values("com.google.mediapipe.apps.objectdetection3d_chair_1stage", "Single Stage Chair Objectron"),
}),
multidex = "native",
deps = [
":manifest_lib",
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
"//mediapipe/framework/formats:landmark_java_proto_lite",

View File

@ -1,4 +1,4 @@
// Copyright 2019 The MediaPipe Authors.
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -14,6 +14,9 @@
package com.google.mediapipe.apps.objectdetection3d;
import android.content.pm.ApplicationInfo;
import android.content.pm.PackageManager;
import android.content.pm.PackageManager.NameNotFoundException;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Bundle;
@ -40,10 +43,25 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private Bitmap objTexture = null;
private Bitmap boxTexture = null;
// ApplicationInfo for retrieving metadata defined in the manifest.
private ApplicationInfo applicationInfo;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
try {
applicationInfo =
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
} catch (NameNotFoundException e) {
Log.e(TAG, "Cannot find application info: " + e);
}
String categoryName = applicationInfo.metaData.getString("categoryName");
float[] modelScale = parseFloatArrayFromString(
applicationInfo.metaData.getString("modelScale"));
float[] modelTransform = parseFloatArrayFromString(
applicationInfo.metaData.getString("modelTransformation"));
prepareDemoAssets();
AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>();
@ -51,6 +69,9 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
inputSidePackets.put("box_asset_name", packetCreator.createString(BOX_FILE));
inputSidePackets.put("obj_texture", packetCreator.createRgbaImageFrame(objTexture));
inputSidePackets.put("box_texture", packetCreator.createRgbaImageFrame(boxTexture));
inputSidePackets.put("allowed_labels", packetCreator.createString(categoryName));
inputSidePackets.put("model_scale", packetCreator.createFloat32Array(modelScale));
inputSidePackets.put("model_transformation", packetCreator.createFloat32Array(modelTransform));
processor.setInputSidePackets(inputSidePackets);
}
@ -134,4 +155,13 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
throw new RuntimeException(e);
}
}
private static float[] parseFloatArrayFromString(String string) {
String[] elements = string.split(",", -1);
float[] array = new float[elements.length];
for (int i = 0; i < elements.length; ++i) {
array[i] = Float.parseFloat(elements[i]);
}
return array;
}
}

Some files were not shown because too many files have changed in this diff Show More