Project import generated by Copybara.

GitOrigin-RevId: 73d686c40057684f8bfaca285368bf1813f9fc26
This commit is contained in:
MediaPipe Team 2022-03-21 12:07:37 -07:00 committed by jqtang
parent e6c19885c6
commit cc6a2f7af6
266 changed files with 3658 additions and 1681 deletions

View File

@ -1 +1 @@
4.2.1 5.0.0

View File

@ -10,5 +10,3 @@ For questions on how to work with MediaPipe, or support for problems that are no
If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/google/mediapipe/security). If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/google/mediapipe/security).
For high-level discussions about MediaPipe, please post to discuss@mediapipe.org, for questions about the development or internal workings of MediaPipe, or if you would like to know how to contribute to MediaPipe, please post to developers@mediapipe.org.

View File

@ -56,7 +56,7 @@ RUN pip3 install tf_slim
RUN ln -s /usr/bin/python3 /usr/bin/python RUN ln -s /usr/bin/python3 /usr/bin/python
# Install bazel # Install bazel
ARG BAZEL_VERSION=4.2.1 ARG BAZEL_VERSION=5.0.0
RUN mkdir /bazel && \ RUN mkdir /bazel && \
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\ wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \

View File

@ -136,8 +136,8 @@ run code search using
## Community ## Community
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome * [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
MediaPipe related frameworks, libraries and software curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users * [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe community discussion around MediaPipe

View File

@ -61,11 +61,12 @@ http_archive(
sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049", sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049",
) )
# Google Benchmark library. # Google Benchmark library v1.6.1 released on 2022-01-10.
http_archive( http_archive(
name = "com_google_benchmark", name = "com_google_benchmark",
urls = ["https://github.com/google/benchmark/archive/main.zip"], urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.6.1.tar.gz"],
strip_prefix = "benchmark-main", strip_prefix = "benchmark-1.6.1",
sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4",
build_file = "@//third_party:benchmark.BUILD", build_file = "@//third_party:benchmark.BUILD",
) )
@ -373,9 +374,9 @@ http_archive(
) )
# Tensorflow repo should always go after the other external dependencies. # Tensorflow repo should always go after the other external dependencies.
# 2021-12-02 # 2022-02-15
_TENSORFLOW_GIT_COMMIT = "18a1dc0ba806dc023808531f0373d9ec068e64bf" _TENSORFLOW_GIT_COMMIT = "a3419acc751dfc19caf4d34a1594e1f76810ec58"
_TENSORFLOW_SHA256 = "85b90416f7a11339327777bccd634de00ca0de2cf334f5f0727edcb11ff9289a" _TENSORFLOW_SHA256 = "b95b2a83632d4055742ae1a2dcc96b45da6c12a339462dbc76c8bca505308e3a"
http_archive( http_archive(
name = "org_tensorflow", name = "org_tensorflow",
urls = [ urls = [
@ -383,7 +384,6 @@ http_archive(
], ],
patches = [ patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff", "@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_objc_cxx17.diff",
# Diff is generated with a script, don't update it manually. # Diff is generated with a script, don't update it manually.
"@//third_party:org_tensorflow_custom_ops.diff", "@//third_party:org_tensorflow_custom_ops.diff",
], ],

View File

@ -109,7 +109,7 @@ for app in ${apps}; do
if [[ ${category} != "shoe" ]]; then if [[ ${category} != "shoe" ]]; then
bazel_flags_extended+=(--define ${category}=true) bazel_flags_extended+=(--define ${category}=true)
fi fi
bazel "${bazel_flags_extended[@]}" bazelisk "${bazel_flags_extended[@]}"
cp -f "${bin}" "${apk}" cp -f "${bin}" "${apk}"
fi fi
apks+=(${apk}) apks+=(${apk})
@ -120,7 +120,7 @@ for app in ${apps}; do
if [[ ${app_name} == "templatematchingcpu" ]]; then if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_4 switch_to_opencv_4
fi fi
bazel "${bazel_flags[@]}" bazelisk "${bazel_flags[@]}"
cp -f "${bin}" "${apk}" cp -f "${bin}" "${apk}"
if [[ ${app_name} == "templatematchingcpu" ]]; then if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_3 switch_to_opencv_3

View File

@ -83,7 +83,7 @@ for app in ${apps}; do
bazel_flags=("${default_bazel_flags[@]}") bazel_flags=("${default_bazel_flags[@]}")
bazel_flags+=(${target}) bazel_flags+=(${target})
bazel "${bazel_flags[@]}" bazelisk "${bazel_flags[@]}"
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}" cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
fi fi
if [[ $build_only == false ]]; then if [[ $build_only == false ]]; then

View File

@ -71,7 +71,7 @@ for app in ${apps}; do
bazel_flags+=(--linkopt=-s) bazel_flags+=(--linkopt=-s)
fi fi
bazel "${bazel_flags[@]}" bazelisk "${bazel_flags[@]}"
cp -f "${bin_dir}/${app}/"*".ipa" "${out_dir}" cp -f "${bin_dir}/${app}/"*".ipa" "${out_dir}"
fi fi
done done

View File

@ -169,7 +169,7 @@ behavior depending on resource constraints.
[`CalculatorBase`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator_base.h [`CalculatorBase`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator_base.h
[`DefaultInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/default_input_stream_handler.h [`DefaultInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/default_input_stream_handler.h
[`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.h [`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
[`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.h [`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
[`CalculatorGraphConfig::max_queue_size`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto [`CalculatorGraphConfig::max_queue_size`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto
[`FlowLimiterCalculator`]: https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/flow_limiter_calculator.cc [`FlowLimiterCalculator`]: https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/flow_limiter_calculator.cc

View File

@ -30,7 +30,7 @@ APIs (currently in alpha) that are now available in
* Install MediaPipe following these [instructions](./install.md). * Install MediaPipe following these [instructions](./install.md).
* Setup Java Runtime. * Setup Java Runtime.
* Setup Android SDK release 30.0.0 and above. * Setup Android SDK release 30.0.0 and above.
* Setup Android NDK version 18 and above. * Setup Android NDK version between 18 and 21.
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
below for Android Studio setup). However, if you prefer using MediaPipe without below for Android Studio setup). However, if you prefer using MediaPipe without

View File

@ -48,6 +48,16 @@ each project.
bazel build -c opt --strip=ALWAYS \ bazel build -c opt --strip=ALWAYS \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a \ --fat_apk_cpu=arm64-v8a,armeabi-v7a \
--legacy_whole_archive=0 \
--features=-legacy_whole_archive \
--copt=-fvisibility=hidden \
--copt=-ffunction-sections \
--copt=-fdata-sections \
--copt=-fstack-protector \
--copt=-Oz \
--copt=-fomit-frame-pointer \
--copt=-DABSL_MIN_LOG_LEVEL=2 \
--linkopt=-Wl,--gc-sections,--strip-all \
//path/to/the/aar/build/file:aar_name.aar //path/to/the/aar/build/file:aar_name.aar
``` ```
@ -57,6 +67,16 @@ each project.
bazel build -c opt --strip=ALWAYS \ bazel build -c opt --strip=ALWAYS \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a \ --fat_apk_cpu=arm64-v8a,armeabi-v7a \
--legacy_whole_archive=0 \
--features=-legacy_whole_archive \
--copt=-fvisibility=hidden \
--copt=-ffunction-sections \
--copt=-fdata-sections \
--copt=-fstack-protector \
--copt=-Oz \
--copt=-fomit-frame-pointer \
--copt=-DABSL_MIN_LOG_LEVEL=2 \
--linkopt=-Wl,--gc-sections,--strip-all \
//mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mediapipe_face_detection.aar //mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mediapipe_face_detection.aar
# It should print: # It should print:

View File

@ -569,7 +569,7 @@ next section.
Option 1. Follow Option 1. Follow
[the official Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html) [the official Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html)
to install Bazel 4.2.1 or higher. to install Bazel 5.0.0 or higher.
Option 2. Follow the official Option 2. Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-bazelisk.html) [Bazel documentation](https://docs.bazel.build/versions/master/install-bazelisk.html)

View File

@ -126,6 +126,7 @@ following steps:
} }
return packet.Get<MyType>(); return packet.Get<MyType>();
}); });
}
} // namespace mediapipe } // namespace mediapipe
``` ```

View File

@ -136,8 +136,8 @@ run code search using
## Community ## Community
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome * [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
MediaPipe related frameworks, libraries and software curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users * [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe community discussion around MediaPipe

View File

@ -26,7 +26,7 @@ MediaPipe Face Detection is an ultrafast face detection solution that comes with
face detector tailored for mobile GPU inference. The detector's super-realtime face detector tailored for mobile GPU inference. The detector's super-realtime
performance enables it to be applied to any live viewfinder experience that performance enables it to be applied to any live viewfinder experience that
requires an accurate facial region of interest as an input for other requires an accurate facial region of interest as an input for other
task-specific models, such as 3D facial keypoint or geometry estimation (e.g., task-specific models, such as 3D facial keypoint estimation (e.g.,
[MediaPipe Face Mesh](./face_mesh.md)), facial features or expression [MediaPipe Face Mesh](./face_mesh.md)), facial features or expression
classification, and face region segmentation. BlazeFace uses a lightweight classification, and face region segmentation. BlazeFace uses a lightweight
feature extraction network inspired by, but distinct from feature extraction network inspired by, but distinct from

View File

@ -20,34 +20,34 @@ nav_order: 2
## Overview ## Overview
MediaPipe Face Mesh is a face geometry solution that estimates 468 3D face MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
landmarks in real-time even on mobile devices. It employs machine learning (ML) real-time even on mobile devices. It employs machine learning (ML) to infer the
to infer the 3D surface geometry, requiring only a single camera input without 3D facial surface, requiring only a single camera input without the need for a
the need for a dedicated depth sensor. Utilizing lightweight model architectures dedicated depth sensor. Utilizing lightweight model architectures together with
together with GPU acceleration throughout the pipeline, the solution delivers GPU acceleration throughout the pipeline, the solution delivers real-time
real-time performance critical for live experiences. performance critical for live experiences.
Additionally, the solution is bundled with the Face Geometry module that bridges Additionally, the solution is bundled with the Face Transform module that
the gap between the face landmark estimation and useful real-time augmented bridges the gap between the face landmark estimation and useful real-time
reality (AR) applications. It establishes a metric 3D space and uses the face augmented reality (AR) applications. It establishes a metric 3D space and uses
landmark screen positions to estimate face geometry within that space. The face the face landmark screen positions to estimate a face transform within that
geometry data consists of common 3D geometry primitives, including a face pose space. The face transform data consists of common 3D primitives, including a
transformation matrix and a triangular face mesh. Under the hood, a lightweight face pose transformation matrix and a triangular face mesh. Under the hood, a
statistical analysis method called lightweight statistical analysis method called
[Procrustes Analysis](https://en.wikipedia.org/wiki/Procrustes_analysis) is [Procrustes Analysis](https://en.wikipedia.org/wiki/Procrustes_analysis) is
employed to drive a robust, performant and portable logic. The analysis runs on employed to drive a robust, performant and portable logic. The analysis runs on
CPU and has a minimal speed/memory footprint on top of the ML model inference. CPU and has a minimal speed/memory footprint on top of the ML model inference.
![face_mesh_ar_effects.gif](../images/face_mesh_ar_effects.gif) | ![face_mesh_ar_effects.gif](../images/face_mesh_ar_effects.gif) |
:-------------------------------------------------------------: | :-------------------------------------------------------------: |
*Fig 1. AR effects utilizing facial surface geometry.* | *Fig 1. AR effects utilizing the 3D facial surface.* |
## ML Pipeline ## ML Pipeline
Our ML pipeline consists of two real-time deep neural network models that work Our ML pipeline consists of two real-time deep neural network models that work
together: A detector that operates on the full image and computes face locations together: A detector that operates on the full image and computes face locations
and a 3D face landmark model that operates on those locations and predicts the and a 3D face landmark model that operates on those locations and predicts the
approximate surface geometry via regression. Having the face accurately cropped approximate 3D surface via regression. Having the face accurately cropped
drastically reduces the need for common data augmentations like affine drastically reduces the need for common data augmentations like affine
transformations consisting of rotations, translation and scale changes. Instead transformations consisting of rotations, translation and scale changes. Instead
it allows the network to dedicate most of its capacity towards coordinate it allows the network to dedicate most of its capacity towards coordinate
@ -55,8 +55,8 @@ prediction accuracy. In addition, in our pipeline the crops can also be
generated based on the face landmarks identified in the previous frame, and only generated based on the face landmarks identified in the previous frame, and only
when the landmark model could no longer identify face presence is the face when the landmark model could no longer identify face presence is the face
detector invoked to relocalize the face. This strategy is similar to that detector invoked to relocalize the face. This strategy is similar to that
employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm detector employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm
together with a hand landmark model. detector together with a hand landmark model.
The pipeline is implemented as a MediaPipe The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt) [graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
@ -128,7 +128,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
:---------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
*Fig 3. Attention Mesh: Overview of model architecture.* | *Fig 3. Attention Mesh: Overview of model architecture.* |
## Face Geometry Module ## Face Transform Module
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
detection in the screen coordinate space: the X- and Y- coordinates are detection in the screen coordinate space: the X- and Y- coordinates are
@ -140,7 +140,7 @@ enable the full spectrum of augmented reality (AR) features like aligning a
virtual 3D object with a detected face. virtual 3D object with a detected face.
The The
[Face Geometry module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry) [Face Transform module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
moves away from the screen coordinate space towards a metric 3D space and moves away from the screen coordinate space towards a metric 3D space and
provides necessary primitives to handle a detected face as a regular 3D object. provides necessary primitives to handle a detected face as a regular 3D object.
By design, you'll be able to use a perspective camera to project the final 3D By design, you'll be able to use a perspective camera to project the final 3D
@ -151,7 +151,7 @@ landmark positions are not changed.
#### Metric 3D Space #### Metric 3D Space
The **Metric 3D space** established within the Face Geometry module is a The **Metric 3D space** established within the Face Transform module is a
right-handed orthonormal metric 3D coordinate space. Within the space, there is right-handed orthonormal metric 3D coordinate space. Within the space, there is
a **virtual perspective camera** located at the space origin and pointed in the a **virtual perspective camera** located at the space origin and pointed in the
negative direction of the Z-axis. In the current pipeline, it is assumed that negative direction of the Z-axis. In the current pipeline, it is assumed that
@ -184,11 +184,11 @@ functions:
### Components ### Components
#### Geometry Pipeline #### Transform Pipeline
The **Geometry Pipeline** is a key component, which is responsible for The **Transform Pipeline** is a key component, which is responsible for
estimating face geometry objects within the Metric 3D space. On each frame, the estimating the face transform objects within the Metric 3D space. On each frame,
following steps are executed in the given order: the following steps are executed in the given order:
- Face landmark screen coordinates are converted into the Metric 3D space - Face landmark screen coordinates are converted into the Metric 3D space
coordinates; coordinates;
@ -199,12 +199,12 @@ following steps are executed in the given order:
positions (XYZ), while both the vertex texture coordinates (UV) and the positions (XYZ), while both the vertex texture coordinates (UV) and the
triangular topology are inherited from the canonical face model. triangular topology are inherited from the canonical face model.
The geometry pipeline is implemented as a MediaPipe The transform pipeline is implemented as a MediaPipe
[calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc). [calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc).
For your convenience, the face geometry pipeline calculator is bundled together For your convenience, this calculator is bundled together with corresponding
with corresponding metadata into a unified MediaPipe metadata into a unified MediaPipe
[subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt). [subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt).
The face geometry format is defined as a Protocol Buffer The face transform format is defined as a Protocol Buffer
[message](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto). [message](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto).
#### Effect Renderer #### Effect Renderer
@ -227,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) | | ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
| :---------------------------------------------------------------------: | | :---------------------------------------------------------------------: |
| *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* | | *Fig 5. An example of face effects rendered by the Face Transform Effect Renderer.* |
## Solution APIs ## Solution APIs

View File

@ -116,7 +116,7 @@ on how to build MediaPipe examples.
Note: The following runs TensorFlow inference on CPU. If you would like to Note: The following runs TensorFlow inference on CPU. If you would like to
run inference on GPU (Linux only), please follow run inference on GPU (Linux only), please follow
[TensorFlow CUDA Support and Setup on Linux Desktop](gpu.md#tensorflow-cuda-support-and-setup-on-linux-desktop) [TensorFlow CUDA Support and Setup on Linux Desktop](../getting_started/gpu_support.md#tensorflow-cuda-support-and-setup-on-linux-desktop)
instead. instead.
To build the TensorFlow CPU inference example on desktop, run: To build the TensorFlow CPU inference example on desktop, run:

View File

@ -384,7 +384,7 @@ Supported configuration options:
<meta charset="utf-8"> <meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script>
</head> </head>

View File

@ -359,7 +359,7 @@ Supported configuration options:
<meta charset="utf-8"> <meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script> <script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
</head> </head>

View File

@ -117,6 +117,7 @@ mediapipe_proto_library(
"//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto", "//mediapipe/framework:calculator_proto",
"//mediapipe/framework/formats:classification_proto", "//mediapipe/framework/formats:classification_proto",
"//mediapipe/framework/formats:landmark_proto",
], ],
) )
@ -309,8 +310,8 @@ cc_library(
) )
cc_library( cc_library(
name = "concatenate_normalized_landmark_list_calculator", name = "concatenate_proto_list_calculator",
srcs = ["concatenate_normalized_landmark_list_calculator.cc"], srcs = ["concatenate_proto_list_calculator.cc"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":concatenate_vector_calculator_cc_proto", ":concatenate_vector_calculator_cc_proto",
@ -324,10 +325,10 @@ cc_library(
) )
cc_test( cc_test(
name = "concatenate_normalized_landmark_list_calculator_test", name = "concatenate_proto_list_calculator_test",
srcs = ["concatenate_normalized_landmark_list_calculator_test.cc"], srcs = ["concatenate_proto_list_calculator_test.cc"],
deps = [ deps = [
":concatenate_normalized_landmark_list_calculator", ":concatenate_proto_list_calculator",
":concatenate_vector_calculator_cc_proto", ":concatenate_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner", "//mediapipe/framework:calculator_runner",
@ -964,8 +965,8 @@ cc_test(
) )
cc_library( cc_library(
name = "split_landmarks_calculator", name = "split_proto_list_calculator",
srcs = ["split_landmarks_calculator.cc"], srcs = ["split_proto_list_calculator.cc"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":split_vector_calculator_cc_proto", ":split_vector_calculator_cc_proto",
@ -979,10 +980,10 @@ cc_library(
) )
cc_test( cc_test(
name = "split_landmarks_calculator_test", name = "split_proto_list_calculator_test",
srcs = ["split_landmarks_calculator_test.cc"], srcs = ["split_proto_list_calculator_test.cc"],
deps = [ deps = [
":split_landmarks_calculator", ":split_proto_list_calculator",
":split_vector_calculator_cc_proto", ":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner", "//mediapipe/framework:calculator_runner",
@ -1195,6 +1196,7 @@ cc_library(
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework:collection_item_id", "//mediapipe/framework:collection_item_id",
"//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",

View File

@ -1,79 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// Concatenates several NormalizedLandmarkList protos following stream index
// order. This class assumes that every input stream contains a
// NormalizedLandmarkList proto object.
class ConcatenateNormalizedLandmarkListCalculator : public Node {
public:
static constexpr Input<NormalizedLandmarkList>::Multiple kIn{""};
static constexpr Output<NormalizedLandmarkList> kOut{""};
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return absl::OkStatus();
}
}
NormalizedLandmarkList output;
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
const NormalizedLandmarkList& list = *input;
for (int j = 0; j < list.landmark_size(); ++j) {
*output.add_landmark() = list.landmark(j);
}
}
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
private:
bool only_emit_if_all_present_;
};
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
} // namespace api2
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_

View File

@ -0,0 +1,118 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// Concatenate several input packets of ListType with a repeated field of
// ItemType into a single output packet of ListType following stream index
// order.
template <typename ItemType, typename ListType>
class ConcatenateListsCalculator : public Node {
public:
static constexpr typename Input<ListType>::Multiple kIn{""};
static constexpr Output<ListType> kOut{""};
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return absl::OkStatus();
}
}
ListType output;
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
const ListType& list = *input;
for (int j = 0; j < ListSize(list); ++j) {
*AddItem(output) = GetItem(list, j);
}
}
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
protected:
virtual int ListSize(const ListType& list) const = 0;
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
virtual ItemType* AddItem(ListType& list) const = 0;
private:
bool only_emit_if_all_present_;
};
// TODO: Move calculators to separate *.cc files
class ConcatenateNormalizedLandmarkListCalculator
: public ConcatenateListsCalculator<NormalizedLandmark,
NormalizedLandmarkList> {
protected:
int ListSize(const NormalizedLandmarkList& list) const override {
return list.landmark_size();
}
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
int idx) const override {
return list.landmark(idx);
}
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
return list.add_landmark();
}
};
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
class ConcatenateLandmarkListCalculator
: public ConcatenateListsCalculator<Landmark, LandmarkList> {
protected:
int ListSize(const LandmarkList& list) const override {
return list.landmark_size();
}
const Landmark GetItem(const LandmarkList& list, int idx) const override {
return list.landmark(idx);
}
Landmark* AddItem(LandmarkList& list) const override {
return list.add_landmark();
}
};
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListCalculator);
} // namespace api2
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_

View File

@ -18,6 +18,7 @@
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h" #include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h" #include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
@ -79,6 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set<uint64>(); packet.Set<uint64>();
} else if (packet_options.has_classification_list_value()) { } else if (packet_options.has_classification_list_value()) {
packet.Set<ClassificationList>(); packet.Set<ClassificationList>();
} else if (packet_options.has_landmark_list_value()) {
packet.Set<LandmarkList>();
} else { } else {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
"None of supported values were specified in options."); "None of supported values were specified in options.");
@ -108,6 +111,9 @@ class ConstantSidePacketCalculator : public CalculatorBase {
} else if (packet_options.has_classification_list_value()) { } else if (packet_options.has_classification_list_value()) {
packet.Set(MakePacket<ClassificationList>( packet.Set(MakePacket<ClassificationList>(
packet_options.classification_list_value())); packet_options.classification_list_value()));
} else if (packet_options.has_landmark_list_value()) {
packet.Set(
MakePacket<LandmarkList>(packet_options.landmark_list_value()));
} else { } else {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
"None of supported values were specified in options."); "None of supported values were specified in options.");

View File

@ -18,6 +18,7 @@ package mediapipe;
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
import "mediapipe/framework/formats/classification.proto"; import "mediapipe/framework/formats/classification.proto";
import "mediapipe/framework/formats/landmark.proto";
option objc_class_prefix = "MediaPipe"; option objc_class_prefix = "MediaPipe";
@ -34,6 +35,7 @@ message ConstantSidePacketCalculatorOptions {
string string_value = 4; string string_value = 4;
uint64 uint64_value = 5; uint64 uint64_value = 5;
ClassificationList classification_list_value = 6; ClassificationList classification_list_value = 6;
LandmarkList landmark_list_value = 7;
} }
} }

View File

@ -29,6 +29,11 @@ namespace api2 {
// This calculator periodically copies the GraphProfile from // This calculator periodically copies the GraphProfile from
// mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream. // mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream.
// //
// Similarly to the log files saved by GraphProfiler::WriteProfile when trace
// logging is enabled, the first captured profile contains the full
// canonicalized graph config and, if tracing is enabled, calculator names in
// graph traces. Subsequent profiles omit this information.
//
// Example config: // Example config:
// node { // node {
// calculator: "GraphProfileCalculator" // calculator: "GraphProfileCalculator"
@ -50,11 +55,14 @@ class GraphProfileCalculator : public Node {
absl::Status Process(CalculatorContext* cc) final { absl::Status Process(CalculatorContext* cc) final {
auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>(); auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>();
if (prev_profile_ts_ == Timestamp::Unset() || bool first_profile = prev_profile_ts_ == Timestamp::Unset();
if (first_profile ||
cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) { cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) {
prev_profile_ts_ = cc->InputTimestamp(); prev_profile_ts_ = cc->InputTimestamp();
GraphProfile result; GraphProfile result;
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(&result)); MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(
&result, first_profile ? PopulateGraphConfig::kFull
: PopulateGraphConfig::kNo));
kProfileOut(cc).Send(result); kProfileOut(cc).Send(result);
} }
return absl::OkStatus(); return absl::OkStatus();

View File

@ -202,6 +202,8 @@ TEST_F(GraphProfileCalculatorTest, GraphProfile) {
} }
})pb"); })pb");
ASSERT_EQ(output_packets.size(), 2);
EXPECT_TRUE(output_packets[0].Get<GraphProfile>().has_config());
EXPECT_THAT(output_packets[1].Get<GraphProfile>(), EXPECT_THAT(output_packets[1].Get<GraphProfile>(),
mediapipe::EqualsProto(expected_profile)); mediapipe::EqualsProto(expected_profile));
} }

View File

@ -23,8 +23,8 @@
#include "mediapipe/framework/port/canonical_errors.h" #include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
// Quantizes a vector of floats to a std::string so that each float becomes a // Quantizes a vector of floats to a string so that each float becomes a byte
// byte in the [0, 255] range. Any value above max_quantized_value or below // in the [0, 255] range. Any value above max_quantized_value or below
// min_quantized_value will be saturated to '/xFF' or '/0'. // min_quantized_value will be saturated to '/xFF' or '/0'.
// //
// Example config: // Example config:

View File

@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT #ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT #define MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/split_vector_calculator.pb.h" #include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
@ -24,30 +24,30 @@
namespace mediapipe { namespace mediapipe {
// Splits an input packet with LandmarkListType into // Splits an input packet of ListType with a repeated field of ItemType
// multiple LandmarkListType output packets using the [begin, end) ranges // into multiple ListType output packets using the [begin, end) ranges
// specified in SplitVectorCalculatorOptions. If the option "element_only" is // specified in SplitVectorCalculatorOptions. If the option "element_only" is
// set to true, all ranges should be of size 1 and all outputs will be elements // set to true, all ranges should be of size 1 and all outputs will be elements
// of type LandmarkType. If "element_only" is false, ranges can be // of type ItemType. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type LandmarkListType. // non-zero in size and all outputs will be of type ListType.
// If the option "combine_outputs" is set to true, only one output stream can be // If the option "combine_outputs" is set to true, only one output stream can be
// specified and all ranges of elements will be combined into one // specified and all ranges of elements will be combined into one
// LandmarkListType. // ListType.
template <typename LandmarkType, typename LandmarkListType> template <typename ItemType, typename ListType>
class SplitLandmarksCalculator : public CalculatorBase { class SplitListsCalculator : public CalculatorBase {
public: public:
static absl::Status GetContract(CalculatorContract* cc) { static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() == 1); RET_CHECK(cc->Inputs().NumEntries() == 1);
RET_CHECK(cc->Outputs().NumEntries() != 0); RET_CHECK(cc->Outputs().NumEntries() != 0);
cc->Inputs().Index(0).Set<LandmarkListType>(); cc->Inputs().Index(0).Set<ListType>();
const auto& options = const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
if (options.combine_outputs()) { if (options.combine_outputs()) {
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Outputs().Index(0).Set<LandmarkListType>(); cc->Outputs().Index(0).Set<ListType>();
for (int i = 0; i < options.ranges_size() - 1; ++i) { for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) { for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i); const auto& range_0 = options.ranges(i);
@ -82,9 +82,9 @@ class SplitLandmarksCalculator : public CalculatorBase {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
"Since element_only is true, all ranges should be of size 1."); "Since element_only is true, all ranges should be of size 1.");
} }
cc->Outputs().Index(i).Set<LandmarkType>(); cc->Outputs().Index(i).Set<ItemType>();
} else { } else {
cc->Outputs().Index(i).Set<LandmarkListType>(); cc->Outputs().Index(i).Set<ListType>();
} }
} }
} }
@ -111,39 +111,38 @@ class SplitLandmarksCalculator : public CalculatorBase {
} }
absl::Status Process(CalculatorContext* cc) override { absl::Status Process(CalculatorContext* cc) override {
const LandmarkListType& input = const ListType& input = cc->Inputs().Index(0).Get<ListType>();
cc->Inputs().Index(0).Get<LandmarkListType>(); RET_CHECK_GE(ListSize(input), max_range_end_)
RET_CHECK_GE(input.landmark_size(), max_range_end_) << "Max range end " << max_range_end_ << " exceeds list size "
<< "Max range end " << max_range_end_ << " exceeds landmarks size " << ListSize(input);
<< input.landmark_size();
if (combine_outputs_) { if (combine_outputs_) {
LandmarkListType output; ListType output;
for (int i = 0; i < ranges_.size(); ++i) { for (int i = 0; i < ranges_.size(); ++i) {
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) { for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const LandmarkType& input_landmark = input.landmark(j); const ItemType& input_item = GetItem(input, j);
*output.add_landmark() = input_landmark; *AddItem(output) = input_item;
} }
} }
RET_CHECK_EQ(output.landmark_size(), total_elements_); RET_CHECK_EQ(ListSize(output), total_elements_);
cc->Outputs().Index(0).AddPacket( cc->Outputs().Index(0).AddPacket(
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp())); MakePacket<ListType>(output).At(cc->InputTimestamp()));
} else { } else {
if (element_only_) { if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) { for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket( cc->Outputs().Index(i).AddPacket(
MakePacket<LandmarkType>(input.landmark(ranges_[i].first)) MakePacket<ItemType>(GetItem(input, ranges_[i].first))
.At(cc->InputTimestamp())); .At(cc->InputTimestamp()));
} }
} else { } else {
for (int i = 0; i < ranges_.size(); ++i) { for (int i = 0; i < ranges_.size(); ++i) {
LandmarkListType output; ListType output;
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) { for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const LandmarkType& input_landmark = input.landmark(j); const ItemType& input_item = GetItem(input, j);
*output.add_landmark() = input_landmark; *AddItem(output) = input_item;
} }
cc->Outputs().Index(i).AddPacket( cc->Outputs().Index(i).AddPacket(
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp())); MakePacket<ListType>(output).At(cc->InputTimestamp()));
} }
} }
} }
@ -151,6 +150,11 @@ class SplitLandmarksCalculator : public CalculatorBase {
return absl::OkStatus(); return absl::OkStatus();
} }
protected:
virtual int ListSize(const ListType& list) const = 0;
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
virtual ItemType* AddItem(ListType& list) const = 0;
private: private:
std::vector<std::pair<int32, int32>> ranges_; std::vector<std::pair<int32, int32>> ranges_;
int32 max_range_end_ = -1; int32 max_range_end_ = -1;
@ -159,15 +163,40 @@ class SplitLandmarksCalculator : public CalculatorBase {
bool combine_outputs_ = false; bool combine_outputs_ = false;
}; };
typedef SplitLandmarksCalculator<NormalizedLandmark, NormalizedLandmarkList> // TODO: Move calculators to separate *.cc files
SplitNormalizedLandmarkListCalculator;
class SplitNormalizedLandmarkListCalculator
: public SplitListsCalculator<NormalizedLandmark, NormalizedLandmarkList> {
protected:
int ListSize(const NormalizedLandmarkList& list) const override {
return list.landmark_size();
}
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
int idx) const override {
return list.landmark(idx);
}
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
return list.add_landmark();
}
};
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator); REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
typedef SplitLandmarksCalculator<Landmark, LandmarkList> class SplitLandmarkListCalculator
SplitLandmarkListCalculator; : public SplitListsCalculator<Landmark, LandmarkList> {
protected:
int ListSize(const LandmarkList& list) const override {
return list.landmark_size();
}
const Landmark GetItem(const LandmarkList& list, int idx) const override {
return list.landmark(idx);
}
Landmark* AddItem(LandmarkList& list) const override {
return list.add_landmark();
}
};
REGISTER_CALCULATOR(SplitLandmarkListCalculator); REGISTER_CALCULATOR(SplitLandmarkListCalculator);
} // namespace mediapipe } // namespace mediapipe
// NOLINTNEXTLINE // NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ #endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_

View File

@ -24,7 +24,7 @@
namespace mediapipe { namespace mediapipe {
// Calculator that converts a std::string into an integer type, or fails if the // Calculator that converts a string into an integer type, or fails if the
// conversion is not possible. // conversion is not possible.
// //
// Example config: // Example config:
@ -47,7 +47,7 @@ class StringToIntCalculatorTemplate : public CalculatorBase {
if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get<std::string>(), if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get<std::string>(),
&number)) { &number)) {
return absl::InvalidArgumentError( return absl::InvalidArgumentError(
"The std::string could not be parsed as an integer."); "The string could not be parsed as an integer.");
} }
cc->OutputSidePackets().Index(0).Set(MakePacket<IntType>(number)); cc->OutputSidePackets().Index(0).Set(MakePacket<IntType>(number));
return absl::OkStatus(); return absl::OkStatus();

View File

@ -239,10 +239,13 @@ cc_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":image_transformation_calculator_cc_proto", ":image_transformation_calculator_cc_proto",
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
"//mediapipe/gpu:scale_mode_cc_proto", "//mediapipe/gpu:scale_mode_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:opencv_core", "//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc", "//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",

View File

@ -105,7 +105,7 @@ absl::StatusOr<ImageFileProperties> GetImageFileProperites(
} // namespace } // namespace
// Calculator to extract EXIF information from an image file. The input is // Calculator to extract EXIF information from an image file. The input is
// a std::string containing raw byte data from a file, and the output is an // a string containing raw byte data from a file, and the output is an
// ImageFileProperties proto object with the relevant fields filled in. // ImageFileProperties proto object with the relevant fields filled in.
// The calculator accepts the input as a stream or a side packet, and can output // The calculator accepts the input as a stream or a side packet, and can output
// the result as a stream or a side packet. The calculator checks that if an // the result as a stream or a side packet. The calculator checks that if an

View File

@ -16,10 +16,13 @@
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/opencv_core_inc.h" #include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/timestamp.h"
#include "mediapipe/gpu/scale_mode.pb.h" #include "mediapipe/gpu/scale_mode.pb.h"
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
@ -52,6 +55,7 @@ namespace mediapipe {
namespace { namespace {
constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kVideoPrestreamTag[] = "VIDEO_PRESTREAM";
int RotationModeToDegrees(mediapipe::RotationMode_Mode rotation) { int RotationModeToDegrees(mediapipe::RotationMode_Mode rotation) {
switch (rotation) { switch (rotation) {
@ -122,6 +126,12 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// provided, it overrides the FLIP_VERTICALLY input side packet and/or // provided, it overrides the FLIP_VERTICALLY input side packet and/or
// corresponding field in the calculator options. // corresponding field in the calculator options.
// //
// VIDEO_PRESTREAM (optional): VideoHeader for the input ImageFrames, if
// rotating or scaling the frames, the header width and height will be updated
// appropriately. Note the header is updated only based on dimensions and
// rotations specified as side packets or options, input_stream
// transformations will not update the header.
//
// Output: // Output:
// One of the following tags: // One of the following tags:
// IMAGE - ImageFrame representing the output image. // IMAGE - ImageFrame representing the output image.
@ -242,6 +252,21 @@ absl::Status ImageTransformationCalculator::GetContract(
cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>(); cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>();
} }
RET_CHECK(cc->Inputs().HasTag(kVideoPrestreamTag) ==
cc->Outputs().HasTag(kVideoPrestreamTag))
<< "If VIDEO_PRESTREAM is provided, it must be provided both as an "
"inputs and output stream.";
if (cc->Inputs().HasTag(kVideoPrestreamTag)) {
RET_CHECK(!(cc->Inputs().HasTag("OUTPUT_DIMENSIONS") ||
cc->Inputs().HasTag("ROTATION_DEGREES")))
<< "If specifying VIDEO_PRESTREAM, the transformations that affect the "
"dimensions of the frames (OUTPUT_DIMENSIONS and ROTATION_DEGREES) "
"need to be constant for every frame, meaning they can only be "
"provided in the calculator options or side packets.";
cc->Inputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
cc->Outputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
}
if (cc->InputSidePackets().HasTag("OUTPUT_DIMENSIONS")) { if (cc->InputSidePackets().HasTag("OUTPUT_DIMENSIONS")) {
cc->InputSidePackets().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>(); cc->InputSidePackets().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>();
} }
@ -326,6 +351,24 @@ absl::Status ImageTransformationCalculator::Open(CalculatorContext* cc) {
} }
absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) { absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
// First update the video header if it is given, based on the rotation and
// dimensions specified as side packets or options. This will only be done
// once, so streaming transformation changes will not be reflected in
// the header.
if (cc->Inputs().HasTag(kVideoPrestreamTag) &&
!cc->Inputs().Tag(kVideoPrestreamTag).IsEmpty() &&
cc->Outputs().HasTag(kVideoPrestreamTag)) {
mediapipe::VideoHeader header =
cc->Inputs().Tag(kVideoPrestreamTag).Get<mediapipe::VideoHeader>();
// Update the header's width and height if needed.
ComputeOutputDimensions(header.width, header.height, &header.width,
&header.height);
cc->Outputs()
.Tag(kVideoPrestreamTag)
.AddPacket(mediapipe::MakePacket<mediapipe::VideoHeader>(header).At(
mediapipe::Timestamp::PreStream()));
}
// Override values if specified so. // Override values if specified so.
if (cc->Inputs().HasTag("ROTATION_DEGREES") && if (cc->Inputs().HasTag("ROTATION_DEGREES") &&
!cc->Inputs().Tag("ROTATION_DEGREES").IsEmpty()) { !cc->Inputs().Tag("ROTATION_DEGREES").IsEmpty()) {

View File

@ -22,9 +22,9 @@
namespace mediapipe { namespace mediapipe {
// Takes in an encoded image std::string, decodes it by OpenCV, and converts to // Takes in an encoded image string, decodes it by OpenCV, and converts to an
// an ImageFrame. Note that this calculator only supports grayscale and RGB // ImageFrame. Note that this calculator only supports grayscale and RGB images
// images for now. // for now.
// //
// Example config: // Example config:
// node { // node {

View File

@ -20,8 +20,8 @@
namespace mediapipe { namespace mediapipe {
// Takes in a std::string, draws the text std::string by cv::putText(), and // Takes in a string, draws the text string by cv::putText(), and outputs an
// outputs an ImageFrame. // ImageFrame.
// //
// Example config: // Example config:
// node { // node {

View File

@ -553,7 +553,6 @@ absl::Status ScaleImageCalculator::Process(CalculatorContext* cc) {
} }
} }
cc->GetCounter("Inputs")->Increment();
const ImageFrame* image_frame; const ImageFrame* image_frame;
ImageFrame converted_image_frame; ImageFrame converted_image_frame;
if (input_format_ == ImageFormat::YCBCR420P) { if (input_format_ == ImageFormat::YCBCR420P) {

View File

@ -183,22 +183,22 @@ absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) { absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
// Setup source images. // Setup source images.
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>(); const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
const cv::Mat current_mat = mediapipe::formats::MatView(&current_frame); auto current_mat = mediapipe::formats::MatView(&current_frame);
RET_CHECK_EQ(current_mat.type(), CV_32FC1) RET_CHECK_EQ(current_mat->type(), CV_32FC1)
<< "Only 1-channel float input image is supported."; << "Only 1-channel float input image is supported.";
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>(); const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
const cv::Mat previous_mat = mediapipe::formats::MatView(&previous_frame); auto previous_mat = mediapipe::formats::MatView(&previous_frame);
RET_CHECK_EQ(previous_mat.type(), current_mat.type()) RET_CHECK_EQ(previous_mat->type(), current_mat->type())
<< "Warning: mixing input format types: " << previous_mat.type() << "Warning: mixing input format types: " << previous_mat->type()
<< " != " << previous_mat.type(); << " != " << previous_mat->type();
RET_CHECK_EQ(current_mat.rows, previous_mat.rows); RET_CHECK_EQ(current_mat->rows, previous_mat->rows);
RET_CHECK_EQ(current_mat.cols, previous_mat.cols); RET_CHECK_EQ(current_mat->cols, previous_mat->cols);
// Setup destination image. // Setup destination image.
auto output_frame = std::make_shared<ImageFrame>( auto output_frame = std::make_shared<ImageFrame>(
current_frame.image_format(), current_mat.cols, current_mat.rows); current_frame.image_format(), current_mat->cols, current_mat->rows);
cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get()); cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get());
output_mat.setTo(cv::Scalar(0)); output_mat.setTo(cv::Scalar(0));
@ -233,8 +233,8 @@ absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
// Write directly to the first channel of output. // Write directly to the first channel of output.
for (int i = 0; i < output_mat.rows; ++i) { for (int i = 0; i < output_mat.rows; ++i) {
float* out_ptr = output_mat.ptr<float>(i); float* out_ptr = output_mat.ptr<float>(i);
const float* curr_ptr = current_mat.ptr<float>(i); const float* curr_ptr = current_mat->ptr<float>(i);
const float* prev_ptr = previous_mat.ptr<float>(i); const float* prev_ptr = previous_mat->ptr<float>(i);
for (int j = 0; j < output_mat.cols; ++j) { for (int j = 0; j < output_mat.cols; ++j) {
const float new_mask_value = curr_ptr[j]; const float new_mask_value = curr_ptr[j];
const float prev_mask_value = prev_ptr[j]; const float prev_mask_value = prev_ptr[j];

View File

@ -116,8 +116,8 @@ void RunGraph(Packet curr_packet, Packet prev_packet, bool use_gpu, float ratio,
ASSERT_EQ(1, output_packets.size()); ASSERT_EQ(1, output_packets.size());
Image result_image = output_packets[0].Get<Image>(); Image result_image = output_packets[0].Get<Image>();
cv::Mat result_mat = formats::MatView(&result_image); auto result_mat = formats::MatView(&result_image);
result_mat.copyTo(*result); result_mat->copyTo(*result);
// Fully close graph at end, otherwise calculator+Images are destroyed // Fully close graph at end, otherwise calculator+Images are destroyed
// after calling WaitUntilDone(). // after calling WaitUntilDone().
@ -135,10 +135,10 @@ void RunTest(bool use_gpu, float mix_ratio, cv::Mat& test_result) {
Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>( Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height)); ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height));
curr_mat.copyTo(formats::MatView(&(curr_packet.Get<Image>()))); curr_mat.copyTo(*formats::MatView(&(curr_packet.Get<Image>())));
Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>( Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height)); ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height));
prev_mat.copyTo(formats::MatView(&(prev_packet.Get<Image>()))); prev_mat.copyTo(*formats::MatView(&(prev_packet.Get<Image>())));
cv::Mat result; cv::Mat result;
RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result); RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result);

View File

@ -84,14 +84,15 @@ cc_library(
tags = ["nomac"], # config problem with cpuinfo via TF tags = ["nomac"], # config problem with cpuinfo via TF
deps = [ deps = [
"inference_calculator_interface", "inference_calculator_interface",
"//mediapipe/framework/deps:file_path",
"//mediapipe/gpu:gl_calculator_helper", "//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer", "//mediapipe/gpu:gpu_buffer",
"//mediapipe/util/tflite:config",
"//mediapipe/util/tflite:tflite_gpu_runner", "//mediapipe/util/tflite:tflite_gpu_runner",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate", "@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape", "@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
], ],
alwayslink = 1, alwayslink = 1,
) )
@ -154,7 +155,7 @@ cc_library(
cc_library( cc_library(
name = "inference_calculator_gl_if_compute_shader_available", name = "inference_calculator_gl_if_compute_shader_available",
deps = select({ deps = selects.with_or({
":compute_shader_unavailable": [], ":compute_shader_unavailable": [],
"//conditions:default": [":inference_calculator_gl"], "//conditions:default": [":inference_calculator_gl"],
}), }),
@ -303,7 +304,7 @@ cc_library(
"//mediapipe/framework/formats:tensor", "//mediapipe/framework/formats:tensor",
"//mediapipe/framework/formats/object_detection:anchor_cc_proto", "//mediapipe/framework/formats/object_detection:anchor_cc_proto",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
] + select({ ] + selects.with_or({
":compute_shader_unavailable": [], ":compute_shader_unavailable": [],
"//conditions:default": [":tensors_to_detections_calculator_gpu_deps"], "//conditions:default": [":tensors_to_detections_calculator_gpu_deps"],
}), }),
@ -560,7 +561,7 @@ cc_library(
cc_library( cc_library(
name = "image_to_tensor_calculator_gpu_deps", name = "image_to_tensor_calculator_gpu_deps",
deps = select({ deps = selects.with_or({
"//mediapipe:android": [ "//mediapipe:android": [
":image_to_tensor_converter_gl_buffer", ":image_to_tensor_converter_gl_buffer",
"//mediapipe/gpu:gl_calculator_helper", "//mediapipe/gpu:gl_calculator_helper",
@ -684,7 +685,7 @@ cc_library(
name = "image_to_tensor_converter_gl_buffer", name = "image_to_tensor_converter_gl_buffer",
srcs = ["image_to_tensor_converter_gl_buffer.cc"], srcs = ["image_to_tensor_converter_gl_buffer.cc"],
hdrs = ["image_to_tensor_converter_gl_buffer.h"], hdrs = ["image_to_tensor_converter_gl_buffer.h"],
deps = ["//mediapipe/framework:port"] + select({ deps = ["//mediapipe/framework:port"] + selects.with_or({
"//mediapipe:apple": [], "//mediapipe:apple": [],
"//conditions:default": [ "//conditions:default": [
":image_to_tensor_converter", ":image_to_tensor_converter",

View File

@ -49,7 +49,6 @@
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h" #include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
#include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_calculator_helper.h"
#endif // MEDIAPIPE_METAL_ENABLED #endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU #endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe { namespace mediapipe {
@ -142,11 +141,24 @@ class ImageToTensorCalculator : public Node {
const auto& options = const auto& options =
cc->Options<mediapipe::ImageToTensorCalculatorOptions>(); cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
RET_CHECK(options.has_output_tensor_float_range()) RET_CHECK(options.has_output_tensor_float_range() ||
options.has_output_tensor_int_range())
<< "Output tensor range is required."; << "Output tensor range is required.";
RET_CHECK_LT(options.output_tensor_float_range().min(), if (options.has_output_tensor_float_range()) {
options.output_tensor_float_range().max()) RET_CHECK_LT(options.output_tensor_float_range().min(),
<< "Valid output tensor range is required."; options.output_tensor_float_range().max())
<< "Valid output float tensor range is required.";
}
if (options.has_output_tensor_int_range()) {
RET_CHECK_LT(options.output_tensor_int_range().min(),
options.output_tensor_int_range().max())
<< "Valid output int tensor range is required.";
RET_CHECK_GE(options.output_tensor_int_range().min(), 0)
<< "The minimum of the output int tensor range must be non-negative.";
RET_CHECK_LE(options.output_tensor_int_range().max(), 255)
<< "The maximum of the output int tensor range must be less than or "
"equal to 255.";
}
RET_CHECK_GT(options.output_tensor_width(), 0) RET_CHECK_GT(options.output_tensor_width(), 0)
<< "Valid output tensor width is required."; << "Valid output tensor width is required.";
RET_CHECK_GT(options.output_tensor_height(), 0) RET_CHECK_GT(options.output_tensor_height(), 0)
@ -175,9 +187,15 @@ class ImageToTensorCalculator : public Node {
options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>(); options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
output_width_ = options_.output_tensor_width(); output_width_ = options_.output_tensor_width();
output_height_ = options_.output_tensor_height(); output_height_ = options_.output_tensor_height();
range_min_ = options_.output_tensor_float_range().min(); is_int_output_ = options_.has_output_tensor_int_range();
range_max_ = options_.output_tensor_float_range().max(); range_min_ =
is_int_output_
? static_cast<float>(options_.output_tensor_int_range().min())
: options_.output_tensor_float_range().min();
range_max_ =
is_int_output_
? static_cast<float>(options_.output_tensor_int_range().max())
: options_.output_tensor_float_range().max();
return absl::OkStatus(); return absl::OkStatus();
} }
@ -225,7 +243,7 @@ class ImageToTensorCalculator : public Node {
} }
// Lazy initialization of the GPU or CPU converter. // Lazy initialization of the GPU or CPU converter.
MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, image->UsesGpu())); MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, *image.get()));
ASSIGN_OR_RETURN(Tensor tensor, ASSIGN_OR_RETURN(Tensor tensor,
(image->UsesGpu() ? gpu_converter_ : cpu_converter_) (image->UsesGpu() ? gpu_converter_ : cpu_converter_)
@ -283,9 +301,15 @@ class ImageToTensorCalculator : public Node {
} }
} }
absl::Status InitConverterIfNecessary(CalculatorContext* cc, bool use_gpu) { absl::Status InitConverterIfNecessary(CalculatorContext* cc,
const Image& image) {
// Lazy initialization of the GPU or CPU converter. // Lazy initialization of the GPU or CPU converter.
if (use_gpu) { if (image.UsesGpu()) {
if (is_int_output_) {
return absl::UnimplementedError(
"ImageToTensorConverter for the input GPU image currently doesn't "
"support quantization.");
}
if (!gpu_converter_) { if (!gpu_converter_) {
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED #if MEDIAPIPE_METAL_ENABLED
@ -296,9 +320,17 @@ class ImageToTensorCalculator : public Node {
CreateImageToGlBufferTensorConverter( CreateImageToGlBufferTensorConverter(
cc, DoesGpuInputStartAtBottom(), GetBorderMode())); cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
#else #else
ASSIGN_OR_RETURN(gpu_converter_, // Check whether the underlying storage object is a GL texture.
CreateImageToGlTextureTensorConverter( if (image.GetGpuBuffer()
cc, DoesGpuInputStartAtBottom(), GetBorderMode())); .internal_storage<mediapipe::GlTextureBuffer>()) {
ASSIGN_OR_RETURN(
gpu_converter_,
CreateImageToGlTextureTensorConverter(
cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
} else {
return absl::UnimplementedError(
"ImageToTensorConverter for the input GPU image is unavailable.");
}
#endif // MEDIAPIPE_METAL_ENABLED #endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU #endif // !MEDIAPIPE_DISABLE_GPU
} }
@ -306,7 +338,10 @@ class ImageToTensorCalculator : public Node {
if (!cpu_converter_) { if (!cpu_converter_) {
#if !MEDIAPIPE_DISABLE_OPENCV #if !MEDIAPIPE_DISABLE_OPENCV
ASSIGN_OR_RETURN(cpu_converter_, ASSIGN_OR_RETURN(cpu_converter_,
CreateOpenCvConverter(cc, GetBorderMode())); CreateOpenCvConverter(
cc, GetBorderMode(),
is_int_output_ ? Tensor::ElementType::kUInt8
: Tensor::ElementType::kFloat32));
#else #else
LOG(FATAL) << "Cannot create image to tensor opencv converter since " LOG(FATAL) << "Cannot create image to tensor opencv converter since "
"MEDIAPIPE_DISABLE_OPENCV is defined."; "MEDIAPIPE_DISABLE_OPENCV is defined.";
@ -321,6 +356,7 @@ class ImageToTensorCalculator : public Node {
mediapipe::ImageToTensorCalculatorOptions options_; mediapipe::ImageToTensorCalculatorOptions options_;
int output_width_ = 0; int output_width_ = 0;
int output_height_ = 0; int output_height_ = 0;
bool is_int_output_ = false;
float range_min_ = 0.0f; float range_min_ = 0.0f;
float range_max_ = 1.0f; float range_max_ = 1.0f;
}; };

View File

@ -31,6 +31,14 @@ message ImageToTensorCalculatorOptions {
optional float max = 2; optional float max = 2;
} }
// Range of int values [min, max].
// min, must be strictly less than max.
// Please note that IntRange is supported for CPU tensors only.
message IntRange {
optional int64 min = 1;
optional int64 max = 2;
}
// Pixel extrapolation methods. See @border_mode. // Pixel extrapolation methods. See @border_mode.
enum BorderMode { enum BorderMode {
BORDER_UNSPECIFIED = 0; BORDER_UNSPECIFIED = 0;
@ -49,6 +57,7 @@ message ImageToTensorCalculatorOptions {
// Output tensor element range/type image pixels are converted to. // Output tensor element range/type image pixels are converted to.
oneof range { oneof range {
FloatRange output_tensor_float_range = 4; FloatRange output_tensor_float_range = 4;
IntRange output_tensor_int_range = 7;
} }
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs // For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs

View File

@ -61,7 +61,8 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
float range_max, int tensor_width, float range_max, int tensor_width,
int tensor_height, bool keep_aspect, int tensor_height, bool keep_aspect,
absl::optional<BorderMode> border_mode, absl::optional<BorderMode> border_mode,
const mediapipe::NormalizedRect& roi) { const mediapipe::NormalizedRect& roi,
bool output_int_tensor) {
std::string border_mode_str; std::string border_mode_str;
if (border_mode) { if (border_mode) {
switch (*border_mode) { switch (*border_mode) {
@ -73,6 +74,21 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
break; break;
} }
} }
std::string output_tensor_range;
if (output_int_tensor) {
output_tensor_range = absl::Substitute(R"(output_tensor_int_range {
min: $0
max: $1
})",
static_cast<int>(range_min),
static_cast<int>(range_max));
} else {
output_tensor_range = absl::Substitute(R"(output_tensor_float_range {
min: $0
max: $1
})",
range_min, range_max);
}
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>( auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(R"( absl::Substitute(R"(
input_stream: "input_image" input_stream: "input_image"
@ -86,22 +102,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
[mediapipe.ImageToTensorCalculatorOptions.ext] { [mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: $0 output_tensor_width: $0
output_tensor_height: $1 output_tensor_height: $1
keep_aspect_ratio: $4 keep_aspect_ratio: $2
output_tensor_float_range { $3 # output range
min: $2 $4 # border mode
max: $3
}
$5 # border mode
} }
} }
} }
)", )",
/*$0=*/tensor_width, /*$0=*/tensor_width,
/*$1=*/tensor_height, /*$1=*/tensor_height,
/*$2=*/range_min, /*$2=*/keep_aspect ? "true" : "false",
/*$3=*/range_max, /*$3=*/output_tensor_range,
/*$4=*/keep_aspect ? "true" : "false", /*$4=*/border_mode_str));
/*$5=*/border_mode_str));
std::vector<Packet> output_packets; std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets); tool::AddVectorSink("tensor", &graph_config, &output_packets);
@ -126,11 +138,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
ASSERT_THAT(tensor_vec, testing::SizeIs(1)); ASSERT_THAT(tensor_vec, testing::SizeIs(1));
const Tensor& tensor = tensor_vec[0]; const Tensor& tensor = tensor_vec[0];
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
auto view = tensor.GetCpuReadView(); auto view = tensor.GetCpuReadView();
cv::Mat tensor_mat(tensor_height, tensor_width, CV_32FC3, cv::Mat tensor_mat;
const_cast<float*>(view.buffer<float>())); if (output_int_tensor) {
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8);
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_8UC3,
const_cast<uint8*>(view.buffer<uint8>()));
} else {
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_32FC3,
const_cast<float*>(view.buffer<float>()));
}
cv::Mat result_rgb; cv::Mat result_rgb;
auto transformation = auto transformation =
GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f).value(); GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f).value();
@ -170,16 +189,26 @@ enum class InputType { kImageFrame, kImage };
const std::vector<InputType> kInputTypesToTest = {InputType::kImageFrame, const std::vector<InputType> kInputTypesToTest = {InputType::kImageFrame,
InputType::kImage}; InputType::kImage};
void RunTest(cv::Mat input, cv::Mat expected_result, float range_min, void RunTest(cv::Mat input, cv::Mat expected_result,
float range_max, int tensor_width, int tensor_height, std::vector<float> float_range, std::vector<int> int_range,
bool keep_aspect, absl::optional<BorderMode> border_mode, int tensor_width, int tensor_height, bool keep_aspect,
absl::optional<BorderMode> border_mode,
const mediapipe::NormalizedRect& roi) { const mediapipe::NormalizedRect& roi) {
ASSERT_EQ(2, float_range.size());
ASSERT_EQ(2, int_range.size());
for (auto input_type : kInputTypesToTest) { for (auto input_type : kInputTypesToTest) {
RunTestWithInputImagePacket( RunTestWithInputImagePacket(
input_type == InputType::kImageFrame ? MakeImageFramePacket(input) input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
: MakeImagePacket(input), : MakeImagePacket(input),
expected_result, range_min, range_max, tensor_width, tensor_height, expected_result, float_range[0], float_range[1], tensor_width,
keep_aspect, border_mode, roi); tensor_height, keep_aspect, border_mode, roi,
/*output_int_tensor=*/false);
RunTestWithInputImagePacket(
input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
: MakeImagePacket(input),
expected_result, int_range[0], int_range[1], tensor_width,
tensor_height, keep_aspect, border_mode, roi,
/*output_int_tensor=*/true);
} }
} }
@ -195,8 +224,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspect) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"), "tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
/*border mode*/ {}, roi); /*border mode*/ {}, roi);
} }
@ -213,8 +242,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectBorderZero) {
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_border_zero.png"), "medium_sub_rect_keep_aspect_border_zero.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }
@ -231,7 +260,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectWithRotation) {
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation.png"), "medium_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f, /*range_max=*/1.0f, /*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kReplicate, roi); BorderMode::kReplicate, roi);
} }
@ -249,7 +279,8 @@ TEST(ImageToTensorCalculatorTest,
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png"), "medium_sub_rect_keep_aspect_with_rotation_border_zero.png"),
/*range_min=*/0.0f, /*range_max=*/1.0f, /*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }
@ -267,8 +298,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotation) {
GetRgb( GetRgb(
"/mediapipe/calculators/" "/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"), "tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"),
/*range_min=*/-1.0f, /*float_range=*/{-1.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
BorderMode::kReplicate, roi); BorderMode::kReplicate, roi);
} }
@ -285,8 +316,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotationBorderZero) {
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"medium_sub_rect_with_rotation_border_zero.png"), "medium_sub_rect_with_rotation_border_zero.png"),
/*range_min=*/-1.0f, /*float_range=*/{-1.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false, /*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }
@ -302,8 +333,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRect) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect.png"), "tensor/testdata/image_to_tensor/large_sub_rect.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
BorderMode::kReplicate, roi); BorderMode::kReplicate, roi);
} }
@ -320,8 +351,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectBorderZero) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png"), "tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }
@ -338,8 +369,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspect) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"), "tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kReplicate, roi); BorderMode::kReplicate, roi);
} }
@ -356,8 +387,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectBorderZero) {
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_border_zero.png"), "large_sub_rect_keep_aspect_border_zero.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }
@ -374,8 +405,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectWithRotation) {
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation.png"), "large_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
/*border_mode=*/{}, roi); /*border_mode=*/{}, roi);
} }
@ -393,8 +424,8 @@ TEST(ImageToTensorCalculatorTest,
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/" "tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation_border_zero.png"), "large_sub_rect_keep_aspect_with_rotation_border_zero.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
/*border_mode=*/BorderMode::kZero, roi); /*border_mode=*/BorderMode::kZero, roi);
} }
@ -410,8 +441,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRange) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png"), "tensor/testdata/image_to_tensor/noop_except_range.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kReplicate, roi); BorderMode::kReplicate, roi);
} }
@ -427,8 +458,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRangeBorderZero) {
"tensor/testdata/image_to_tensor/input.jpg"), "tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/" GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png"), "tensor/testdata/image_to_tensor/noop_except_range.png"),
/*range_min=*/0.0f, /*float_range=*/{0.0f, 1.0f},
/*range_max=*/1.0f, /*int_range=*/{0, 255},
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true, /*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kZero, roi); BorderMode::kZero, roi);
} }

View File

@ -16,7 +16,7 @@
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array> #include <array>
#include <memory> #include <memory>
@ -339,4 +339,4 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -17,7 +17,7 @@
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <memory> #include <memory>
@ -37,6 +37,6 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_ #endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_

View File

@ -2,7 +2,7 @@
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array> #include <array>
#include <memory> #include <memory>
@ -85,4 +85,4 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -3,7 +3,7 @@
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array> #include <array>
#include <memory> #include <memory>
@ -40,6 +40,6 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context);
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_UTILS_H_ #endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_UTILS_H_

View File

@ -1,6 +1,6 @@
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_utils.h" #include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_utils.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
@ -46,4 +46,4 @@ TEST(ImageToTensorConverterGlUtilsTest, GlTexParameteriOverrider) {
} // namespace } // namespace
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -35,7 +35,8 @@ namespace {
class OpenCvProcessor : public ImageToTensorConverter { class OpenCvProcessor : public ImageToTensorConverter {
public: public:
OpenCvProcessor(BorderMode border_mode) { OpenCvProcessor(BorderMode border_mode, Tensor::ElementType tensor_type)
: tensor_type_(tensor_type) {
switch (border_mode) { switch (border_mode) {
case BorderMode::kReplicate: case BorderMode::kReplicate:
border_mode_ = cv::BORDER_REPLICATE; border_mode_ = cv::BORDER_REPLICATE;
@ -44,6 +45,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
border_mode_ = cv::BORDER_CONSTANT; border_mode_ = cv::BORDER_CONSTANT;
break; break;
} }
mat_type_ = tensor_type == Tensor::ElementType::kUInt8 ? CV_8UC3 : CV_32FC3;
} }
absl::StatusOr<Tensor> Convert(const mediapipe::Image& input, absl::StatusOr<Tensor> Convert(const mediapipe::Image& input,
@ -56,15 +58,20 @@ class OpenCvProcessor : public ImageToTensorConverter {
absl::StrCat("Only RGBA/RGB formats are supported, passed format: ", absl::StrCat("Only RGBA/RGB formats are supported, passed format: ",
static_cast<uint32_t>(input.image_format()))); static_cast<uint32_t>(input.image_format())));
} }
cv::Mat src = mediapipe::formats::MatView(&input); auto src = mediapipe::formats::MatView(&input);
constexpr int kNumChannels = 3; constexpr int kNumChannels = 3;
Tensor tensor( Tensor tensor(tensor_type_, Tensor::Shape{1, output_dims.height,
Tensor::ElementType::kFloat32, output_dims.width, kNumChannels});
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
auto buffer_view = tensor.GetCpuWriteView(); auto buffer_view = tensor.GetCpuWriteView();
cv::Mat dst(output_dims.height, output_dims.width, CV_32FC3, cv::Mat dst;
buffer_view.buffer<float>()); if (tensor_type_ == Tensor::ElementType::kUInt8) {
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
buffer_view.buffer<uint8>());
} else {
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
buffer_view.buffer<float>());
}
const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y), const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y),
cv::Size2f(roi.width, roi.height), cv::Size2f(roi.width, roi.height),
@ -85,7 +92,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
cv::Mat projection_matrix = cv::Mat projection_matrix =
cv::getPerspectiveTransform(src_points, dst_points); cv::getPerspectiveTransform(src_points, dst_points);
cv::Mat transformed; cv::Mat transformed;
cv::warpPerspective(src, transformed, projection_matrix, cv::warpPerspective(*src, transformed, projection_matrix,
cv::Size(dst_width, dst_height), cv::Size(dst_width, dst_height),
/*flags=*/cv::INTER_LINEAR, /*flags=*/cv::INTER_LINEAR,
/*borderMode=*/border_mode_); /*borderMode=*/border_mode_);
@ -102,19 +109,22 @@ class OpenCvProcessor : public ImageToTensorConverter {
auto transform, auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax, GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max)); range_min, range_max));
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset); transformed.convertTo(dst, mat_type_, transform.scale, transform.offset);
return tensor; return tensor;
} }
private: private:
enum cv::BorderTypes border_mode_; enum cv::BorderTypes border_mode_;
Tensor::ElementType tensor_type_;
int mat_type_;
}; };
} // namespace } // namespace
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter( absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
CalculatorContext* cc, BorderMode border_mode) { CalculatorContext* cc, BorderMode border_mode,
return absl::make_unique<OpenCvProcessor>(border_mode); Tensor::ElementType tensor_type) {
return absl::make_unique<OpenCvProcessor>(border_mode, tensor_type);
} }
} // namespace mediapipe } // namespace mediapipe

View File

@ -25,7 +25,8 @@ namespace mediapipe {
// Creates OpenCV image-to-tensor converter. // Creates OpenCV image-to-tensor converter.
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter( absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
CalculatorContext* cc, BorderMode border_mode); CalculatorContext* cc, BorderMode border_mode,
Tensor::ElementType tensor_type);
} // namespace mediapipe } // namespace mediapipe

View File

@ -36,6 +36,7 @@ class InferenceCalculatorSelectorImpl
Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>( Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>(
subgraph_node); subgraph_node);
std::vector<absl::string_view> impls; std::vector<absl::string_view> impls;
const bool should_use_gpu = const bool should_use_gpu =
!options.has_delegate() || // Use GPU delegate if not specified !options.has_delegate() || // Use GPU delegate if not specified
(options.has_delegate() && options.delegate().has_gpu()); (options.has_delegate() && options.delegate().has_gpu());

View File

@ -81,6 +81,7 @@ class InferenceCalculatorCpuImpl
Packet<TfLiteModelPtr> model_packet_; Packet<TfLiteModelPtr> model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_; std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteDelegatePtr delegate_; TfLiteDelegatePtr delegate_;
bool has_quantized_input_;
}; };
absl::Status InferenceCalculatorCpuImpl::UpdateContract( absl::Status InferenceCalculatorCpuImpl::UpdateContract(
@ -109,10 +110,18 @@ absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
for (int i = 0; i < input_tensors.size(); ++i) { for (int i = 0; i < input_tensors.size(); ++i) {
const Tensor* input_tensor = &input_tensors[i]; const Tensor* input_tensor = &input_tensors[i];
auto input_tensor_view = input_tensor->GetCpuReadView(); auto input_tensor_view = input_tensor->GetCpuReadView();
auto input_tensor_buffer = input_tensor_view.buffer<float>(); if (has_quantized_input_) {
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i); // TODO: Support more quantized tensor types.
std::memcpy(local_tensor_buffer, input_tensor_buffer, auto input_tensor_buffer = input_tensor_view.buffer<uint8>();
input_tensor->bytes()); uint8* local_tensor_buffer = interpreter_->typed_input_tensor<uint8>(i);
std::memcpy(local_tensor_buffer, input_tensor_buffer,
input_tensor->bytes());
} else {
auto input_tensor_buffer = input_tensor_view.buffer<float>();
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i);
std::memcpy(local_tensor_buffer, input_tensor_buffer,
input_tensor->bytes());
}
} }
// Run inference. // Run inference.
@ -167,10 +176,9 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegateAndAllocateTensors(
// AllocateTensors() can be called only after ModifyGraphWithDelegate. // AllocateTensors() can be called only after ModifyGraphWithDelegate.
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk); RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors. has_quantized_input_ =
RET_CHECK_NE( interpreter_->tensor(interpreter_->inputs()[0])->quantization.type ==
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type, kTfLiteAffineQuantization;
kTfLiteAffineQuantization);
return absl::OkStatus(); return absl::OkStatus();
} }
@ -226,7 +234,7 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
#endif // defined(__EMSCRIPTEN__) #endif // defined(__EMSCRIPTEN__)
if (use_xnnpack) { if (use_xnnpack) {
TfLiteXNNPackDelegateOptions xnnpack_opts{}; auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
xnnpack_opts.num_threads = xnnpack_opts.num_threads =
GetXnnpackNumThreads(opts_has_delegate, opts_delegate); GetXnnpackNumThreads(opts_has_delegate, opts_delegate);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts), delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),

View File

@ -154,8 +154,9 @@ TEST_P(InferenceCalculatorTest, TestFaceDetection) {
detection_packets[0].Get<std::vector<Detection>>(); detection_packets[0].Get<std::vector<Detection>>();
#if !defined(MEDIAPIPE_PROTO_LITE) #if !defined(MEDIAPIPE_PROTO_LITE)
// Approximately is not available with lite protos (b/178137094). // Approximately is not available with lite protos (b/178137094).
EXPECT_THAT(dets, constexpr float kEpison = 0.001;
ElementsAre(Approximately(EqualsProto(expected_detection)))); EXPECT_THAT(dets, ElementsAre(Approximately(EqualsProto(expected_detection),
kEpison)));
#endif #endif
} }

View File

@ -59,8 +59,6 @@ class InferenceCalculatorGlImpl
// TfLite requires us to keep the model alive as long as the interpreter is. // TfLite requires us to keep the model alive as long as the interpreter is.
Packet<TfLiteModelPtr> model_packet_; Packet<TfLiteModelPtr> model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteDelegatePtr delegate_;
#if MEDIAPIPE_TFLITE_GL_INFERENCE #if MEDIAPIPE_TFLITE_GL_INFERENCE
mediapipe::GlCalculatorHelper gpu_helper_; mediapipe::GlCalculatorHelper gpu_helper_;
@ -72,6 +70,9 @@ class InferenceCalculatorGlImpl
tflite_gpu_runner_usage_; tflite_gpu_runner_usage_;
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE #endif // MEDIAPIPE_TFLITE_GL_INFERENCE
TfLiteDelegatePtr delegate_;
std::unique_ptr<tflite::Interpreter> interpreter_;
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED #if MEDIAPIPE_TFLITE_GPU_SUPPORTED
std::vector<Tensor::Shape> output_shapes_; std::vector<Tensor::Shape> output_shapes_;
std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_; std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_;
@ -252,12 +253,17 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
gpu_buffers_in_.clear(); gpu_buffers_in_.clear();
gpu_buffers_out_.clear(); gpu_buffers_out_.clear();
// Delegate must outlive the interpreter, hence the order is important.
interpreter_ = nullptr;
delegate_ = nullptr;
return absl::OkStatus(); return absl::OkStatus();
})); }));
} else {
// Delegate must outlive the interpreter, hence the order is important.
interpreter_ = nullptr;
delegate_ = nullptr;
} }
interpreter_ = nullptr;
delegate_ = nullptr;
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -266,6 +266,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
auto raw_box_tensor = &input_tensors[0]; auto raw_box_tensor = &input_tensors[0];
RET_CHECK_EQ(raw_box_tensor->shape().dims.size(), 3); RET_CHECK_EQ(raw_box_tensor->shape().dims.size(), 3);
RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1); RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_); RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_);
RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_); RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_);
auto raw_score_tensor = &input_tensors[1]; auto raw_score_tensor = &input_tensors[1];
@ -385,6 +386,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU(
CalculatorContext* cc, std::vector<Detection>* output_detections) { CalculatorContext* cc, std::vector<Detection>* output_detections) {
const auto& input_tensors = *kInTensors(cc); const auto& input_tensors = *kInTensors(cc);
RET_CHECK_GE(input_tensors.size(), 2); RET_CHECK_GE(input_tensors.size(), 2);
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE #ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &input_tensors, &cc, MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &input_tensors, &cc,
@ -563,7 +565,6 @@ absl::Status TensorsToDetectionsCalculator::LoadOptions(CalculatorContext* cc) {
// Get calculator options specified in the graph. // Get calculator options specified in the graph.
options_ = cc->Options<::mediapipe::TensorsToDetectionsCalculatorOptions>(); options_ = cc->Options<::mediapipe::TensorsToDetectionsCalculatorOptions>();
RET_CHECK(options_.has_num_classes()); RET_CHECK(options_.has_num_classes());
RET_CHECK(options_.has_num_boxes());
RET_CHECK(options_.has_num_coords()); RET_CHECK(options_.has_num_coords());
num_classes_ = options_.num_classes(); num_classes_ = options_.num_classes();

View File

@ -355,9 +355,10 @@ absl::Status TensorsToSegmentationCalculator::ProcessCpu(
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>( std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_width, output_height); ImageFormat::VEC32F1, output_width, output_height);
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame); std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
cv::Mat output_mat = formats::MatView(output_mask.get()); auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output. // Upsample small mask into output.
cv::resize(small_mask_mat, output_mat, cv::Size(output_width, output_height)); cv::resize(small_mask_mat, *output_mat,
cv::Size(output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp()); cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
return absl::OkStatus(); return absl::OkStatus();

View File

@ -334,6 +334,7 @@ cc_library(
":image_frame_to_tensor_calculator_cc_proto", ":image_frame_to_tensor_calculator_cc_proto",
"//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:core_proto",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
] + select({ ] + select({

View File

@ -17,6 +17,7 @@
#include "mediapipe/calculators/tensorflow/image_frame_to_tensor_calculator.pb.h" #include "mediapipe/calculators/tensorflow/image_frame_to_tensor_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/proto_ns.h"
#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h" #include "mediapipe/framework/port/status_macros.h"
@ -32,7 +33,10 @@ namespace {
// Convert the ImageFrame into Tensor with floating point value type. // Convert the ImageFrame into Tensor with floating point value type.
// The value will be normalized based on mean and stddev. // The value will be normalized based on mean and stddev.
std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor( std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
const ImageFrame& image_frame, float mean, float stddev) { // const ImageFrame& image_frame, float mean, float stddev) {
const ImageFrame& image_frame,
const mediapipe::proto_ns::RepeatedField<float>& mean,
const mediapipe::proto_ns::RepeatedField<float>& stddev) {
const int cols = image_frame.Width(); const int cols = image_frame.Width();
const int rows = image_frame.Height(); const int rows = image_frame.Height();
const int channels = image_frame.NumberOfChannels(); const int channels = image_frame.NumberOfChannels();
@ -45,7 +49,20 @@ std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
for (int row = 0; row < rows; ++row) { for (int row = 0; row < rows; ++row) {
for (int col = 0; col < cols; ++col) { for (int col = 0; col < cols; ++col) {
for (int channel = 0; channel < channels; ++channel) { for (int channel = 0; channel < channels; ++channel) {
tensor_data(row, col, channel) = (pixel[channel] - mean) / stddev; float mean_value = 0;
if (mean.size() > 1) {
mean_value = mean[channel];
} else if (!mean.empty()) {
mean_value = mean[0];
}
float stddev_value = 1;
if (stddev.size() > 1) {
stddev_value = stddev[channel];
} else if (!stddev.empty()) {
stddev_value = stddev[0];
}
tensor_data(row, col, channel) =
(pixel[channel] - mean_value) / stddev_value;
} }
pixel += channels; pixel += channels;
} }
@ -126,7 +143,18 @@ absl::Status ImageFrameToTensorCalculator::Process(CalculatorContext* cc) {
const tf::DataType data_type = options_.data_type(); const tf::DataType data_type = options_.data_type();
RET_CHECK_EQ(data_type, tf::DT_FLOAT) RET_CHECK_EQ(data_type, tf::DT_FLOAT)
<< "Unsupported data type " << data_type; << "Unsupported data type " << data_type;
RET_CHECK_GT(options_.stddev(), 0.0f); RET_CHECK_GT(options_.stddev().size(), 0) << "You must set a stddev.";
RET_CHECK_GT(options_.stddev()[0], 0.0f) << "The stddev cannot be zero.";
if (options_.stddev().size() > 1) {
RET_CHECK_EQ(options_.stddev().size(), video_frame.NumberOfChannels())
<< "If specifying multiple stddev normalization values, "
<< "the number must match the number of image channels.";
}
if (options_.mean().size() > 1) {
RET_CHECK_EQ(options_.mean().size(), video_frame.NumberOfChannels())
<< "If specifying multiple mean normalization values, "
<< "the number must match the number of image channels.";
}
tensor = ImageFrameToNormalizedTensor(video_frame, options_.mean(), tensor = ImageFrameToNormalizedTensor(video_frame, options_.mean(),
options_.stddev()); options_.stddev());
} else { } else {

View File

@ -32,6 +32,6 @@ message ImageFrameToTensorCalculatorOptions {
// If set, the output tensor T is equal to (F - mean * J) / stddev, where F // If set, the output tensor T is equal to (F - mean * J) / stddev, where F
// and J are the input image frame and the all-ones matrix of the same size, // and J are the input image frame and the all-ones matrix of the same size,
// respectively. Otherwise, T is equal to F. // respectively. Otherwise, T is equal to F.
optional float mean = 2; repeated float mean = 2;
optional float stddev = 3; repeated float stddev = 3;
} }

View File

@ -454,4 +454,32 @@ TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithMeanAndStddev) {
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128 EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
} }
TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithRepeatMeanAndStddev) {
runner_ = ::absl::make_unique<CalculatorRunner>(
"ImageFrameToTensorCalculator",
"[mediapipe.ImageFrameToTensorCalculatorOptions.ext]"
"{data_type:DT_FLOAT mean:128.0 mean:128.0 mean:128.0 "
" stddev:128.0 stddev:128.0 stddev:128.0}",
1, 1, 0);
// Create a single pixel image of fixed color #0080ff.
auto image_frame = ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1, 1);
const uint8 color[] = {0, 128, 255};
SetToColor<uint8>(color, image_frame.get());
runner_->MutableInputs()->Index(0).packets.push_back(
Adopt(image_frame.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner_->Run());
const auto& tensor = runner_->Outputs().Index(0).packets[0].Get<tf::Tensor>();
EXPECT_EQ(tensor.dtype(), tf::DT_FLOAT);
ASSERT_EQ(tensor.dims(), 3);
EXPECT_EQ(tensor.shape().dim_size(0), 1);
EXPECT_EQ(tensor.shape().dim_size(1), 1);
EXPECT_EQ(tensor.shape().dim_size(2), 3);
const float* actual = tensor.flat<float>().data();
EXPECT_EQ(actual[0], -1.0f); // ( 0 - 128) / 128
EXPECT_EQ(actual[1], 0.0f); // (128 - 128) / 128
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
}
} // namespace mediapipe } // namespace mediapipe

View File

@ -70,10 +70,10 @@ const int kNumCoordsPerBox = 4;
// image/understanding/object_detection/export_inference_graph.py // image/understanding/object_detection/export_inference_graph.py
// //
// By default, the output Detections store label ids (integers) for each // By default, the output Detections store label ids (integers) for each
// detection. Optionally, a label map (of the form std::map<int, std::string> // detection. Optionally, a label map (of the form std::map<int, string>
// mapping label ids to label names as strings) can be made available as an // mapping label ids to label names as strings) can be made available as an
// input side packet, in which case the output Detections store // input side packet, in which case the output Detections store
// labels as their associated std::string provided by the label map. // labels as their associated string provided by the label map.
// //
// Usage example: // Usage example:
// node { // node {

View File

@ -59,7 +59,7 @@ namespace mpms = mediapipe::mediasequence;
// bounding boxes from vector<Detections>, and streams with the // bounding boxes from vector<Detections>, and streams with the
// "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s // "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s
// associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints // associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints
// from flat_hash_map<std::string, vector<pair<float, float>>>. "IMAGE_${NAME}", // from flat_hash_map<string, vector<pair<float, float>>>. "IMAGE_${NAME}",
// "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of // "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of
// each stream, which allows for multiple image streams to be included. However, // each stream, which allows for multiple image streams to be included. However,
// the default names are suppored by more tools. // the default names are suppored by more tools.

View File

@ -28,7 +28,7 @@
// output_side_packet: "SEQUENCE_EXAMPLE:sequence_example" // output_side_packet: "SEQUENCE_EXAMPLE:sequence_example"
// } // }
// //
// Example converting to std::string in Close(): // Example converting to string in Close():
// node { // node {
// calculator: "StringToSequenceExampleCalculator" // calculator: "StringToSequenceExampleCalculator"
// input_side_packet: "SEQUENCE_EXAMPLE:sequence_example" // input_side_packet: "SEQUENCE_EXAMPLE:sequence_example"

View File

@ -302,10 +302,9 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
<< "To use recurrent_tag_pairs, batch_size must be 1."; << "To use recurrent_tag_pairs, batch_size must be 1.";
for (const auto& tag_pair : options_.recurrent_tag_pair()) { for (const auto& tag_pair : options_.recurrent_tag_pair()) {
const std::vector<std::string> tags = absl::StrSplit(tag_pair, ':'); const std::vector<std::string> tags = absl::StrSplit(tag_pair, ':');
RET_CHECK_EQ(tags.size(), 2) RET_CHECK_EQ(tags.size(), 2) << "recurrent_tag_pair must be a colon "
<< "recurrent_tag_pair must be a colon " "separated string with two components: "
"separated std::string with two components: " << tag_pair;
<< tag_pair;
RET_CHECK(mediapipe::ContainsKey(tag_to_tensor_map_, tags[0])) RET_CHECK(mediapipe::ContainsKey(tag_to_tensor_map_, tags[0]))
<< "Can't find tag '" << tags[0] << "' in signature " << "Can't find tag '" << tags[0] << "' in signature "
<< options_.signature_name(); << options_.signature_name();

View File

@ -86,7 +86,7 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase {
cc->InputSidePackets() cc->InputSidePackets()
.Tag(kStringModelFilePathTag) .Tag(kStringModelFilePathTag)
.Set<std::string>( .Set<std::string>(
// Filename of std::string model. // Filename of string model.
); );
} }
cc->OutputSidePackets() cc->OutputSidePackets()

View File

@ -84,7 +84,7 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator {
} else if (input_side_packets->HasTag(kStringModelFilePathTag)) { } else if (input_side_packets->HasTag(kStringModelFilePathTag)) {
input_side_packets->Tag(kStringModelFilePathTag) input_side_packets->Tag(kStringModelFilePathTag)
.Set<std::string>( .Set<std::string>(
// Filename of std::string model. // Filename of string model.
); );
} }
output_side_packets->Tag(kSessionTag) output_side_packets->Tag(kSessionTag)

View File

@ -69,6 +69,8 @@ const std::string MaybeConvertSignatureToTag(
[](unsigned char c) { return std::toupper(c); }); [](unsigned char c) { return std::toupper(c); });
output = absl::StrReplaceAll(output, {{"/", "_"}}); output = absl::StrReplaceAll(output, {{"/", "_"}});
output = absl::StrReplaceAll(output, {{"-", "_"}}); output = absl::StrReplaceAll(output, {{"-", "_"}});
output = absl::StrReplaceAll(output, {{".", "_"}});
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
return output; return output;
} else { } else {
return name; return name;

View File

@ -71,6 +71,8 @@ const std::string MaybeConvertSignatureToTag(
[](unsigned char c) { return std::toupper(c); }); [](unsigned char c) { return std::toupper(c); });
output = absl::StrReplaceAll(output, {{"/", "_"}}); output = absl::StrReplaceAll(output, {{"/", "_"}});
output = absl::StrReplaceAll(output, {{"-", "_"}}); output = absl::StrReplaceAll(output, {{"-", "_"}});
output = absl::StrReplaceAll(output, {{".", "_"}});
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
return output; return output;
} else { } else {
return name; return name;

View File

@ -939,7 +939,7 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
#if !defined(MEDIAPIPE_EDGE_TPU) #if !defined(MEDIAPIPE_EDGE_TPU)
if (use_xnnpack) { if (use_xnnpack) {
TfLiteXNNPackDelegateOptions xnnpack_opts{}; auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts); xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts), delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
&TfLiteXNNPackDelegateDelete); &TfLiteXNNPackDelegateDelete);

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <string>
#include <vector> #include <vector>
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"
@ -558,7 +559,7 @@ uniform ivec2 out_size;
const int output_layer_index = int($1); const int output_layer_index = int($1);
const float combine_with_previous_ratio = float($2); const float combine_with_previous_ratio = float($2);
// Will be replaced with either '#define READ_PREVIOUS' or empty std::string // Will be replaced with either '#define READ_PREVIOUS' or empty string
$3 //DEFINE_READ_PREVIOUS $3 //DEFINE_READ_PREVIOUS
void main() { void main() {

View File

@ -51,6 +51,7 @@ class LocalFilePatternContentsCalculator : public CalculatorBase {
cc->InputSidePackets().Tag(kFileDirectoryTag).Get<std::string>(), cc->InputSidePackets().Tag(kFileDirectoryTag).Get<std::string>(),
cc->InputSidePackets().Tag(kFileSuffixTag).Get<std::string>(), cc->InputSidePackets().Tag(kFileSuffixTag).Get<std::string>(),
&filenames_)); &filenames_));
std::sort(filenames_.begin(), filenames_.end());
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -129,8 +129,8 @@ TEST(PacketFrequencyCalculatorTest, MultiPacketTest) {
// Tests packet frequency with multiple input/output streams. // Tests packet frequency with multiple input/output streams.
TEST(PacketFrequencyCalculatorTest, MultiStreamTest) { TEST(PacketFrequencyCalculatorTest, MultiStreamTest) {
// Setup the calculator runner and provide strings as input on all streams // Setup the calculator runner and provide strings as input on all streams
// (note that it doesn't have to be std::string; the calculator can take any // (note that it doesn't have to be string; the calculator can take any type
// type as input). // as input).
CalculatorRunner runner(GetNodeWithMultipleStreams()); CalculatorRunner runner(GetNodeWithMultipleStreams());
// Packet 1 on stream 1. // Packet 1 on stream 1.

View File

@ -37,6 +37,13 @@ RenderAnnotation::Rectangle* NewRect(
annotation->mutable_color()->set_b(options.color().b()); annotation->mutable_color()->set_b(options.color().b());
annotation->set_thickness(options.thickness()); annotation->set_thickness(options.thickness());
if (options.has_top_left_thickness()) {
CHECK(!options.oval());
CHECK(!options.filled());
annotation->mutable_rectangle()->set_top_left_thickness(
options.top_left_thickness());
}
return options.oval() ? options.filled() return options.oval() ? options.filled()
? annotation->mutable_filled_oval() ? annotation->mutable_filled_oval()
->mutable_oval() ->mutable_oval()
@ -136,6 +143,11 @@ absl::Status RectToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0)); cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<RectToRenderDataCalculatorOptions>(); options_ = cc->Options<RectToRenderDataCalculatorOptions>();
if (options_.has_top_left_thickness()) {
// Filled and oval don't support top_left_thickness.
RET_CHECK(!options_.filled());
RET_CHECK(!options_.oval());
}
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -35,4 +35,8 @@ message RectToRenderDataCalculatorOptions {
// Whether the rendered rectangle should be an oval. // Whether the rendered rectangle should be an oval.
optional bool oval = 4 [default = false]; optional bool oval = 4 [default = false];
// Radius of top left corner circle. Only supported for oval=false,
// filled=false.
optional double top_left_thickness = 5;
} }

View File

@ -48,8 +48,8 @@ constexpr char kTopKIndexesTag[] = "TOP_K_INDEXES";
constexpr char kScoresTag[] = "SCORES"; constexpr char kScoresTag[] = "SCORES";
// A calculator that takes a vector of scores and returns the indexes, scores, // A calculator that takes a vector of scores and returns the indexes, scores,
// labels of the top k elements, classification protos, and summary std::string // labels of the top k elements, classification protos, and summary string (in
// (in csv format). // csv format).
// //
// Usage example: // Usage example:
// node { // node {

View File

@ -76,7 +76,7 @@ constexpr char kTrackingTag[] = "TRACKING";
// IMAGE_SIZE: Input image dimension. // IMAGE_SIZE: Input image dimension.
// TRACKED_BOXES : input box tracking result (proto TimedBoxProtoList) from // TRACKED_BOXES : input box tracking result (proto TimedBoxProtoList) from
// BoxTrackerCalculator. // BoxTrackerCalculator.
// ADD_INDEX: Optional std::string containing binary format proto of type // ADD_INDEX: Optional string containing binary format proto of type
// BoxDetectorIndex. Used for adding target index to the detector // BoxDetectorIndex. Used for adding target index to the detector
// search index during runtime. // search index during runtime.
// CANCEL_OBJECT_ID: Optional id of box to be removed. This is recommended // CANCEL_OBJECT_ID: Optional id of box to be removed. This is recommended
@ -91,8 +91,7 @@ constexpr char kTrackingTag[] = "TRACKING";
// BOXES: Optional output stream of type TimedBoxProtoList for each lost box. // BOXES: Optional output stream of type TimedBoxProtoList for each lost box.
// //
// Imput side packets: // Imput side packets:
// INDEX_PROTO_STRING: Optional std::string containing binary format proto of // INDEX_PROTO_STRING: Optional string containing binary format proto of type
// type
// BoxDetectorIndex. Used for initializing box_detector // BoxDetectorIndex. Used for initializing box_detector
// with predefined template images. // with predefined template images.
// FRAME_ALIGNMENT: Optional integer to indicate alignment_boundary for // FRAME_ALIGNMENT: Optional integer to indicate alignment_boundary for

View File

@ -15,6 +15,7 @@
#include <stdio.h> #include <stdio.h>
#include <memory> #include <memory>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
@ -78,7 +79,7 @@ const char kOptionsTag[] = "OPTIONS";
// TrackingData and added to current set of tracked boxes. // TrackingData and added to current set of tracked boxes.
// This is recommended to be used with SyncSetInputStreamHandler. // This is recommended to be used with SyncSetInputStreamHandler.
// START_POS_PROTO_STRING: Same as START_POS, but is in the form of serialized // START_POS_PROTO_STRING: Same as START_POS, but is in the form of serialized
// protobuffer std::string. When both START_POS and // protobuffer string. When both START_POS and
// START_POS_PROTO_STRING are present, START_POS is used. Suggest // START_POS_PROTO_STRING are present, START_POS is used. Suggest
// to specify only one of them. // to specify only one of them.
// RESTART_POS: Same as START_POS, but exclusively for receiving detection // RESTART_POS: Same as START_POS, but exclusively for receiving detection
@ -99,7 +100,7 @@ const char kOptionsTag[] = "OPTIONS";
// can be in arbitrary order. // can be in arbitrary order.
// Use with SyncSetInputStreamHandler in streaming mode only. // Use with SyncSetInputStreamHandler in streaming mode only.
// RA_TRACK_PROTO_STRING: Same as RA_TRACK, but is in the form of serialized // RA_TRACK_PROTO_STRING: Same as RA_TRACK, but is in the form of serialized
// protobuffer std::string. When both RA_TRACK and // protobuffer string. When both RA_TRACK and
// RA_TRACK_PROTO_STRING are present, RA_TRACK is used. Suggest // RA_TRACK_PROTO_STRING are present, RA_TRACK is used. Suggest
// to specify only one of them. // to specify only one of them.
// //

View File

@ -15,6 +15,7 @@
#include <cmath> #include <cmath>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <string>
#include "absl/strings/numbers.h" #include "absl/strings/numbers.h"
#include "absl/strings/str_split.h" #include "absl/strings/str_split.h"

View File

@ -79,7 +79,7 @@ ImageFormat::Format GetImageFormat(int num_channels) {
// to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH". // to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH".
// The calculator will call FFmpeg binary to save audio tracks as an aac file. // The calculator will call FFmpeg binary to save audio tracks as an aac file.
// If the audio tracks can't be extracted by FFmpeg, the output side packet // If the audio tracks can't be extracted by FFmpeg, the output side packet
// will contain an empty std::string. // will contain an empty string.
// //
// Example config: // Example config:
// node { // node {

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists

View File

@ -10,6 +10,9 @@
<uses-permission android:name="android.permission.CAMERA" /> <uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" /> <uses-feature android:name="android.hardware.camera" />
<!-- For profiling -->
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
<application <application
android:allowBackup="true" android:allowBackup="true"
android:icon="@mipmap/ic_launcher" android:icon="@mipmap/ic_launcher"

View File

@ -40,6 +40,7 @@ android_binary(
"//mediapipe/modules/face_detection:face_detection_short_range.tflite", "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt", "//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite", "//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
"//mediapipe/modules/pose_detection:pose_detection.tflite", "//mediapipe/modules/pose_detection:pose_detection.tflite",

View File

@ -80,6 +80,7 @@ cc_library(
"//mediapipe/framework/formats:location_data_cc_proto", "//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
], ],
alwayslink = 1, alwayslink = 1,
) )

View File

@ -15,6 +15,7 @@
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include "absl/status/status.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h" #include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.pb.h" #include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_state.h" #include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_state.h"
@ -41,6 +42,7 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
// Can be used to control whether an animated zoom should actually performed // Can be used to control whether an animated zoom should actually performed
// (configured through option us_to_first_rect). If provided, a non-zero integer // (configured through option us_to_first_rect). If provided, a non-zero integer
// will allow the animated zoom to be used when the first detections arrive. // will allow the animated zoom to be used when the first detections arrive.
// Applies to first detection only.
constexpr char kAnimateZoom[] = "ANIMATE_ZOOM"; constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
// Can be used to control the maximum zoom; note that it is re-evaluated only // Can be used to control the maximum zoom; note that it is re-evaluated only
// upon change of input resolution. A value of 100 disables zooming and is the // upon change of input resolution. A value of 100 disables zooming and is the
@ -112,6 +114,16 @@ class ContentZoomingCalculator : public CalculatorBase {
int* pan_offset, int* height); int* pan_offset, int* height);
// Sets max_frame_value_ and target_aspect_ // Sets max_frame_value_ and target_aspect_
absl::Status UpdateAspectAndMax(); absl::Status UpdateAspectAndMax();
// Smooth camera path
absl::Status SmoothAndClampPath(int target_width, int target_height,
float path_width, float path_height,
float* path_offset_x, float* path_offset_y);
// Compute box containing all detections.
absl::Status GetDetectionsBox(mediapipe::CalculatorContext* cc, float* xmin,
float* xmax, float* ymin, float* ymax,
bool* only_required_found,
bool* has_detections);
ContentZoomingCalculatorOptions options_; ContentZoomingCalculatorOptions options_;
// Detection frame width/height. // Detection frame width/height.
int frame_height_; int frame_height_;
@ -537,68 +549,13 @@ absl::Status ContentZoomingCalculator::Process(
UpdateForResolutionChange(cc, frame_width, frame_height)); UpdateForResolutionChange(cc, frame_width, frame_height));
} }
bool only_required_found = false;
// Compute the box that contains all "is_required" detections. // Compute the box that contains all "is_required" detections.
float xmin = 1, ymin = 1, xmax = 0, ymax = 0; float xmin = 1, ymin = 1, xmax = 0, ymax = 0;
if (cc->Inputs().HasTag(kSalientRegions)) { bool only_required_found = false;
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>(); bool has_detections = true;
for (const auto& region : detection_set.detections()) { MP_RETURN_IF_ERROR(GetDetectionsBox(cc, &xmin, &xmax, &ymin, &ymax,
if (!region.only_required()) { &only_required_found, &has_detections));
continue; if (!has_detections) return absl::OkStatus();
}
only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
region, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
}
}
if (cc->Inputs().HasTag(kDetections)) {
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
if (last_only_required_detection_ == 0) {
// If no detections are available and we never had any,
// simply return the full-image rectangle as crop-rect.
if (cc->Outputs().HasTag(kCropRect)) {
auto default_rect = absl::make_unique<mediapipe::Rect>();
default_rect->set_x_center(frame_width_ / 2);
default_rect->set_y_center(frame_height_ / 2);
default_rect->set_width(frame_width_);
default_rect->set_height(frame_height_);
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
Timestamp(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
default_rect->set_x_center(0.5);
default_rect->set_y_center(0.5);
default_rect->set_width(1.0);
default_rect->set_height(1.0);
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
}
// Also provide a first crop rect: in this case a zero-sized one.
if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs()
.Tag(kFirstCropRect)
.Add(new mediapipe::NormalizedRect(),
Timestamp(cc->InputTimestamp()));
}
return absl::OkStatus();
}
} else {
auto raw_detections = cc->Inputs()
.Tag(kDetections)
.Get<std::vector<mediapipe::Detection>>();
for (const auto& detection : raw_detections) {
only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
detection, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
}
}
}
const bool may_start_animation = (options_.us_to_first_rect() != 0) && const bool may_start_animation = (options_.us_to_first_rect() != 0) &&
(!cc->Inputs().HasTag(kAnimateZoom) || (!cc->Inputs().HasTag(kAnimateZoom) ||
@ -656,7 +613,8 @@ absl::Status ContentZoomingCalculator::Process(
path_solver_zoom_->ClearHistory(); path_solver_zoom_->ClearHistory();
} }
const bool camera_active = const bool camera_active =
is_animating || pan_state || tilt_state || zoom_state; is_animating || ((pan_state || tilt_state || zoom_state) &&
!options_.disable_animations());
// Waiting for first rect before setting any value of the camera active flag // Waiting for first rect before setting any value of the camera active flag
// so we avoid setting it to false during initialization. // so we avoid setting it to false during initialization.
if (cc->Outputs().HasTag(kCameraActive) && if (cc->Outputs().HasTag(kCameraActive) &&
@ -666,17 +624,26 @@ absl::Status ContentZoomingCalculator::Process(
.AddPacket(MakePacket<bool>(camera_active).At(cc->InputTimestamp())); .AddPacket(MakePacket<bool>(camera_active).At(cc->InputTimestamp()));
} }
// Skip the path solvers to the final destination if not animating.
const bool disable_animations =
options_.disable_animations() && path_solver_zoom_->IsInitialized();
if (disable_animations) {
MP_RETURN_IF_ERROR(path_solver_zoom_->SetState(height));
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(offset_y));
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(offset_x));
}
// Compute smoothed zoom camera path. // Compute smoothed zoom camera path.
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation( MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
height, cc->InputTimestamp().Microseconds())); height, cc->InputTimestamp().Microseconds()));
float path_height; float path_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height)); MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
float path_width = path_height * target_aspect_; const float path_width = path_height * target_aspect_;
// Update pixel-per-degree value for pan/tilt. // Update pixel-per-degree value for pan/tilt.
int target_height; int target_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetTargetPosition(&target_height)); MP_RETURN_IF_ERROR(path_solver_zoom_->GetTargetPosition(&target_height));
int target_width = target_height * target_aspect_; const int target_width = target_height * target_aspect_;
MP_RETURN_IF_ERROR(path_solver_pan_->UpdatePixelsPerDegree( MP_RETURN_IF_ERROR(path_solver_pan_->UpdatePixelsPerDegree(
static_cast<float>(target_width) / kFieldOfView)); static_cast<float>(target_width) / kFieldOfView));
MP_RETURN_IF_ERROR(path_solver_tilt_->UpdatePixelsPerDegree( MP_RETURN_IF_ERROR(path_solver_tilt_->UpdatePixelsPerDegree(
@ -692,66 +659,16 @@ absl::Status ContentZoomingCalculator::Process(
float path_offset_y; float path_offset_y;
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y)); MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
float delta_height; // Update path.
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height)); MP_RETURN_IF_ERROR(SmoothAndClampPath(target_width, target_height, path_width,
int delta_width = delta_height * target_aspect_; path_height, &path_offset_x,
&path_offset_y));
// Smooth centering when zooming out.
float remaining_width = target_width - path_width;
int width_space = frame_width_ - target_width;
if (abs(path_offset_x - frame_width_ / 2) >
width_space / 2 + kPixelTolerance &&
remaining_width > kPixelTolerance) {
float required_width =
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
if (path_offset_x < frame_width_ / 2) {
path_offset_x += delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
} else {
path_offset_x -= delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
}
}
float remaining_height = target_height - path_height;
int height_space = frame_height_ - target_height;
if (abs(path_offset_y - frame_height_ / 2) >
height_space / 2 + kPixelTolerance &&
remaining_height > kPixelTolerance) {
float required_height =
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
if (path_offset_y < frame_height_ / 2) {
path_offset_y += delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
} else {
path_offset_y -= delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
}
}
// Prevent box from extending beyond the image after camera smoothing.
if (path_offset_y - ceil(path_height / 2.0) < 0) {
path_offset_y = ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
} else if (path_offset_y + ceil(path_height / 2.0) > frame_height_) {
path_offset_y = frame_height_ - ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
}
if (path_offset_x - ceil(path_width / 2.0) < 0) {
path_offset_x = ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
} else if (path_offset_x + ceil(path_width / 2.0) > frame_width_) {
path_offset_x = frame_width_ - ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
}
// Convert to top/bottom borders to remove.
int path_top = path_offset_y - path_height / 2;
int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
// Transmit result downstream to scenecroppingcalculator. // Transmit result downstream to scenecroppingcalculator.
if (cc->Outputs().HasTag(kDetectedBorders)) { if (cc->Outputs().HasTag(kDetectedBorders)) {
// Convert to top/bottom borders to remove.
const int path_top = path_offset_y - path_height / 2;
const int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
std::unique_ptr<StaticFeatures> features = std::unique_ptr<StaticFeatures> features =
absl::make_unique<StaticFeatures>(); absl::make_unique<StaticFeatures>();
MakeStaticFeatures(path_top, path_bottom, frame_width_, frame_height_, MakeStaticFeatures(path_top, path_bottom, frame_width_, frame_height_,
@ -798,8 +715,8 @@ absl::Status ContentZoomingCalculator::Process(
if (cc->Outputs().HasTag(kNormalizedCropRect)) { if (cc->Outputs().HasTag(kNormalizedCropRect)) {
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect = std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
absl::make_unique<mediapipe::NormalizedRect>(); absl::make_unique<mediapipe::NormalizedRect>();
float float_frame_width = static_cast<float>(frame_width_); const float float_frame_width = static_cast<float>(frame_width_);
float float_frame_height = static_cast<float>(frame_height_); const float float_frame_height = static_cast<float>(frame_height_);
if (is_animating) { if (is_animating) {
auto rect = auto rect =
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp()); GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
@ -829,5 +746,130 @@ absl::Status ContentZoomingCalculator::Process(
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status ContentZoomingCalculator::SmoothAndClampPath(
int target_width, int target_height, float path_width, float path_height,
float* path_offset_x, float* path_offset_y) {
float delta_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
const int delta_width = delta_height * target_aspect_;
// Smooth centering when zooming out.
const float remaining_width = target_width - path_width;
const int width_space = frame_width_ - target_width;
if (abs(*path_offset_x - frame_width_ / 2) >
width_space / 2 + kPixelTolerance &&
remaining_width > kPixelTolerance) {
const float required_width =
abs(*path_offset_x - frame_width_ / 2) - width_space / 2;
if (*path_offset_x < frame_width_ / 2) {
*path_offset_x += delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
} else {
*path_offset_x -= delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
}
}
const float remaining_height = target_height - path_height;
const int height_space = frame_height_ - target_height;
if (abs(*path_offset_y - frame_height_ / 2) >
height_space / 2 + kPixelTolerance &&
remaining_height > kPixelTolerance) {
const float required_height =
abs(*path_offset_y - frame_height_ / 2) - height_space / 2;
if (*path_offset_y < frame_height_ / 2) {
*path_offset_y += delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
} else {
*path_offset_y -= delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
}
}
// Prevent box from extending beyond the image after camera smoothing.
if (*path_offset_y - ceil(path_height / 2.0) < 0) {
*path_offset_y = ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
} else if (*path_offset_y + ceil(path_height / 2.0) > frame_height_) {
*path_offset_y = frame_height_ - ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
}
if (*path_offset_x - ceil(path_width / 2.0) < 0) {
*path_offset_x = ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
} else if (*path_offset_x + ceil(path_width / 2.0) > frame_width_) {
*path_offset_x = frame_width_ - ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
}
return absl::OkStatus();
}
absl::Status ContentZoomingCalculator::GetDetectionsBox(
mediapipe::CalculatorContext* cc, float* xmin, float* xmax, float* ymin,
float* ymax, bool* only_required_found, bool* has_detections) {
if (cc->Inputs().HasTag(kSalientRegions)) {
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>();
for (const auto& region : detection_set.detections()) {
if (!region.only_required()) {
continue;
}
*only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
region, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
}
}
if (cc->Inputs().HasTag(kDetections)) {
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
if (last_only_required_detection_ == 0) {
// If no detections are available and we never had any,
// simply return the full-image rectangle as crop-rect.
if (cc->Outputs().HasTag(kCropRect)) {
auto default_rect = absl::make_unique<mediapipe::Rect>();
default_rect->set_x_center(frame_width_ / 2);
default_rect->set_y_center(frame_height_ / 2);
default_rect->set_width(frame_width_);
default_rect->set_height(frame_height_);
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
Timestamp(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
default_rect->set_x_center(0.5);
default_rect->set_y_center(0.5);
default_rect->set_width(1.0);
default_rect->set_height(1.0);
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
}
// Also provide a first crop rect: in this case a zero-sized one.
if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs()
.Tag(kFirstCropRect)
.Add(new mediapipe::NormalizedRect(),
Timestamp(cc->InputTimestamp()));
}
*has_detections = false;
return absl::OkStatus();
}
} else {
auto raw_detections = cc->Inputs()
.Tag(kDetections)
.Get<std::vector<mediapipe::Detection>>();
for (const auto& detection : raw_detections) {
*only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
detection, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
}
}
}
return absl::OkStatus();
}
} // namespace autoflip } // namespace autoflip
} // namespace mediapipe } // namespace mediapipe

View File

@ -19,7 +19,7 @@ package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto"; import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto";
import "mediapipe/framework/calculator.proto"; import "mediapipe/framework/calculator.proto";
// NextTag: 18 // NextTag: 19
message ContentZoomingCalculatorOptions { message ContentZoomingCalculatorOptions {
extend mediapipe.CalculatorOptions { extend mediapipe.CalculatorOptions {
optional ContentZoomingCalculatorOptions ext = 313091992; optional ContentZoomingCalculatorOptions ext = 313091992;
@ -71,6 +71,12 @@ message ContentZoomingCalculatorOptions {
// us_to_first_rect time budget. // us_to_first_rect time budget.
optional int64 us_to_first_rect_delay = 16 [default = 0]; optional int64 us_to_first_rect_delay = 16 [default = 0];
// When true, this flag disables animating camera motions,
// and cuts directly to final target position.
// Does not apply to the first instance (first detection will still animate).
// Use "ANIMATE_ZOOM" input stream to control the first animation.
optional bool disable_animations = 18;
// Deprecated parameters // Deprecated parameters
optional KinematicOptions kinematic_options = 2 [deprecated = true]; optional KinematicOptions kinematic_options = 2 [deprecated = true];
optional int64 min_motion_to_reframe = 4 [deprecated = true]; optional int64 min_motion_to_reframe = 4 [deprecated = true];

View File

@ -56,7 +56,7 @@ constexpr char kRegionsTag[] = "REGIONS";
constexpr char kDetectionsTag[] = "DETECTIONS"; constexpr char kDetectionsTag[] = "DETECTIONS";
// Converts an object detection to a autoflip SignalType. Returns true if the // Converts an object detection to a autoflip SignalType. Returns true if the
// std::string label has a autoflip label. // string label has a autoflip label.
bool MatchType(const std::string& label, SignalType* type) { bool MatchType(const std::string& label, SignalType* type) {
if (label == "person") { if (label == "person") {
type->set_standard(SignalType::HUMAN); type->set_standard(SignalType::HUMAN);

View File

@ -182,7 +182,7 @@ namespace {
absl::Status ParseAspectRatioString(const std::string& aspect_ratio_string, absl::Status ParseAspectRatioString(const std::string& aspect_ratio_string,
double* aspect_ratio) { double* aspect_ratio) {
std::string error_msg = std::string error_msg =
"Aspect ratio std::string must be in the format of 'width:height', e.g. " "Aspect ratio string must be in the format of 'width:height', e.g. "
"'1:1' or '5:4', your input was " + "'1:1' or '5:4', your input was " +
aspect_ratio_string; aspect_ratio_string;
auto pos = aspect_ratio_string.find(':'); auto pos = aspect_ratio_string.find(':');

View File

@ -4,6 +4,7 @@ constexpr float kMinVelocity = 0.5;
namespace mediapipe { namespace mediapipe {
namespace autoflip { namespace autoflip {
namespace { namespace {
int Median(const std::deque<std::pair<uint64, int>>& positions_raw) { int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
std::deque<int> positions; std::deque<int> positions;
@ -16,6 +17,7 @@ int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
return positions[n]; return positions[n];
} }
} // namespace } // namespace
bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) { bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
if (options_.has_min_motion_to_reframe()) { if (options_.has_min_motion_to_reframe()) {
return abs(delta_degs) < options_.min_motion_to_reframe(); return abs(delta_degs) < options_.min_motion_to_reframe();
@ -25,7 +27,9 @@ bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
return abs(delta_degs) < options_.min_motion_to_reframe_lower(); return abs(delta_degs) < options_.min_motion_to_reframe_lower();
} }
} }
void KinematicPathSolver::ClearHistory() { raw_positions_at_time_.clear(); } void KinematicPathSolver::ClearHistory() { raw_positions_at_time_.clear(); }
absl::Status KinematicPathSolver::PredictMotionState(int position, absl::Status KinematicPathSolver::PredictMotionState(int position,
const uint64 time_us, const uint64 time_us,
bool* state) { bool* state) {
@ -48,6 +52,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
} }
int filtered_position = Median(raw_positions_at_time_copy); int filtered_position = Median(raw_positions_at_time_copy);
filtered_position =
std::clamp(filtered_position, min_location_, max_location_);
double delta_degs = double delta_degs =
(filtered_position - current_position_px_) / pixels_per_degree_; (filtered_position - current_position_px_) / pixels_per_degree_;
@ -59,6 +66,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
// If the motion is smaller than the reframe_window and camera is moving, // If the motion is smaller than the reframe_window and camera is moving,
// don't use the update. // don't use the update.
*state = false; *state = false;
} else if (prior_position_px_ == current_position_px_ && motion_state_) {
// Camera isn't actually moving. Likely face is past bounds.
*state = false;
} else { } else {
// Apply new position, plus the reframe window size. // Apply new position, plus the reframe window size.
*state = true; *state = true;
@ -66,6 +76,7 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status KinematicPathSolver::AddObservation(int position, absl::Status KinematicPathSolver::AddObservation(int position,
const uint64 time_us) { const uint64 time_us) {
if (!initialized_) { if (!initialized_) {
@ -181,18 +192,22 @@ absl::Status KinematicPathSolver::AddObservation(int position,
} }
// Time and position updates. // Time and position updates.
double delta_t = (time_us - current_time_) / 1000000.0; double delta_t_sec = (time_us - current_time_) / 1000000.0;
if (options_.max_delta_time_sec() > 0) {
// If updates are very infrequent, then limit the max time difference.
delta_t_sec = fmin(delta_t_sec, options_.max_delta_time_sec());
}
// Time since last state/prediction update, smoothed by // Time since last state/prediction update, smoothed by
// mean_period_update_rate. // mean_period_update_rate.
if (mean_delta_t_ < 0) { if (mean_delta_t_ < 0) {
mean_delta_t_ = delta_t; mean_delta_t_ = delta_t_sec;
} else { } else {
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) + mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
delta_t * options_.mean_period_update_rate(); delta_t_sec * options_.mean_period_update_rate();
} }
// Observed velocity and then weighted update of this velocity. // Observed velocity and then weighted update of this velocity (deg/sec).
double observed_velocity = delta_degs / delta_t; double observed_velocity = delta_degs / delta_t_sec;
double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(), double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
options_.max_update_rate()); options_.max_update_rate());
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) + double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
@ -253,7 +268,8 @@ absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
absl::Status KinematicPathSolver::SetState(const float position) { absl::Status KinematicPathSolver::SetState(const float position) {
RET_CHECK(initialized_) << "SetState called before first observation added."; RET_CHECK(initialized_) << "SetState called before first observation added.";
current_position_px_ = position; current_position_px_ = std::clamp(position, static_cast<float>(min_location_),
static_cast<float>(max_location_));
return absl::OkStatus(); return absl::OkStatus();
} }

View File

@ -71,6 +71,8 @@ class KinematicPathSolver {
// Provides the change in position from last state. // Provides the change in position from last state.
absl::Status GetDeltaState(float* delta_position); absl::Status GetDeltaState(float* delta_position);
bool IsInitialized() { return initialized_; }
private: private:
// Tuning options. // Tuning options.
KinematicOptions options_; KinematicOptions options_;

View File

@ -31,6 +31,9 @@ message KinematicOptions {
optional int64 filtering_time_window_us = 7 [default = 0]; optional int64 filtering_time_window_us = 7 [default = 0];
// Weighted update of average period, used for motion updates. // Weighted update of average period, used for motion updates.
optional float mean_period_update_rate = 8 [default = 0.25]; optional float mean_period_update_rate = 8 [default = 0.25];
// When set, caps the maximum time difference (seconds) calculated between new
// updates/observations. Useful when updates come very infrequently.
optional double max_delta_time_sec = 13;
// Scale factor for max velocity, to be multiplied by the distance from center // Scale factor for max velocity, to be multiplied by the distance from center
// in degrees. Cannot be used with max_velocity and must be used with // in degrees. Cannot be used with max_velocity and must be used with
// max_velocity_shift. // max_velocity_shift.

View File

@ -419,6 +419,13 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
MP_ASSERT_OK(solver.SetState(400)); MP_ASSERT_OK(solver.SetState(400));
MP_ASSERT_OK(solver.GetState(&state)); MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 400); EXPECT_FLOAT_EQ(state, 400);
// Expect to stay in bounds.
MP_ASSERT_OK(solver.SetState(600));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 500);
MP_ASSERT_OK(solver.SetState(-100));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 0);
} }
TEST(KinematicPathSolverTest, PassBorderTest) { TEST(KinematicPathSolverTest, PassBorderTest) {
KinematicOptions options; KinematicOptions options;

View File

@ -83,7 +83,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
const double in, const double out, Problem* problem, double* a, double* b, const double in, const double out, Problem* problem, double* a, double* b,
double* c, double* d, double* k) { double* c, double* d, double* k) {
// Creating a cost function, with 1D residual and 5 1D parameter blocks. This // Creating a cost function, with 1D residual and 5 1D parameter blocks. This
// is what the "1, 1, 1, 1, 1, 1" std::string below means. // is what the "1, 1, 1, 1, 1, 1" string below means.
CostFunction* cost_function = CostFunction* cost_function =
new AutoDiffCostFunction<PolynomialResidual, 1, 1, 1, 1, 1, 1>( new AutoDiffCostFunction<PolynomialResidual, 1, 1, 1, 1, 1, 1>(
new PolynomialResidual(in, out)); new PolynomialResidual(in, out));

View File

@ -55,7 +55,8 @@ class SceneCameraMotionAnalyzer {
scene_camera_motion_analyzer_options) scene_camera_motion_analyzer_options)
: options_(scene_camera_motion_analyzer_options), : options_(scene_camera_motion_analyzer_options),
time_since_last_salient_region_us_(0), time_since_last_salient_region_us_(0),
has_solid_color_background_(false) {} has_solid_color_background_(false),
total_scene_frames_(0) {}
~SceneCameraMotionAnalyzer() {} ~SceneCameraMotionAnalyzer() {}

View File

@ -44,7 +44,7 @@ absl::Status PrintHelloWorld() {
ASSIGN_OR_RETURN(OutputStreamPoller poller, ASSIGN_OR_RETURN(OutputStreamPoller poller,
graph.AddOutputStreamPoller("out")); graph.AddOutputStreamPoller("out"));
MP_RETURN_IF_ERROR(graph.StartRun({})); MP_RETURN_IF_ERROR(graph.StartRun({}));
// Give 10 input packets that contains the same std::string "Hello World!". // Give 10 input packets that contains the same string "Hello World!".
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
"in", MakePacket<std::string>("Hello World!").At(Timestamp(i)))); "in", MakePacket<std::string>("Hello World!").At(Timestamp(i))));
@ -52,7 +52,7 @@ absl::Status PrintHelloWorld() {
// Close the input stream "in". // Close the input stream "in".
MP_RETURN_IF_ERROR(graph.CloseInputStream("in")); MP_RETURN_IF_ERROR(graph.CloseInputStream("in"));
mediapipe::Packet packet; mediapipe::Packet packet;
// Get the output packets std::string. // Get the output packets string.
while (poller.Next(&packet)) { while (poller.Next(&packet)) {
LOG(INFO) << packet.Get<std::string>(); LOG(INFO) << packet.Get<std::string>();
} }

View File

@ -72,6 +72,7 @@ objc_library(
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb", "//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite",
], ],
features = ["-layering_check"],
sdk_frameworks = [ sdk_frameworks = [
"AVFoundation", "AVFoundation",
"CoreGraphics", "CoreGraphics",

View File

@ -58,6 +58,7 @@ objc_library(
"//mediapipe/modules/face_detection:face_detection_short_range.tflite", "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt", "//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite", "//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
"//mediapipe/modules/pose_detection:pose_detection.tflite", "//mediapipe/modules/pose_detection:pose_detection.tflite",

View File

@ -150,6 +150,13 @@ mediapipe_proto_library(
deps = ["//mediapipe/framework:mediapipe_options_proto"], deps = ["//mediapipe/framework:mediapipe_options_proto"],
) )
config_setting(
name = "android_no_jni",
define_values = {"MEDIAPIPE_NO_JNI": "1"},
values = {"crosstool_top": "//external:android/crosstool"},
visibility = ["//visibility:public"],
)
cc_library( cc_library(
name = "calculator_base", name = "calculator_base",
srcs = ["calculator_base.cc"], srcs = ["calculator_base.cc"],
@ -712,6 +719,7 @@ cc_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
"@com_google_absl//absl/synchronization",
], ],
) )
@ -916,15 +924,19 @@ cc_library(
":packet", ":packet",
":packet_set", ":packet_set",
":type_map", ":type_map",
"//mediapipe/framework/deps:no_destructor",
"//mediapipe/framework/port:logging", "//mediapipe/framework/port:logging",
"//mediapipe/framework/port:map_util", "//mediapipe/framework/port:map_util",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:source_location", "//mediapipe/framework/port:source_location",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"//mediapipe/framework/tool:status_util", "//mediapipe/framework/tool:status_util",
"//mediapipe/framework/tool:type_util",
"//mediapipe/framework/tool:validate_name", "//mediapipe/framework/tool:validate_name",
"@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
"@com_google_absl//absl/types:variant",
], ],
) )

View File

@ -134,6 +134,7 @@ cc_test(
deps = [ deps = [
":packet", ":packet",
"//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:gtest_main",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
], ],
) )

View File

@ -313,8 +313,8 @@ template <class Calc>
class Node : public NodeBase { class Node : public NodeBase {
public: public:
Node() : NodeBase(Calc::kCalculatorName) {} Node() : NodeBase(Calc::kCalculatorName) {}
// Overrides the built-in calculator type std::string with the provided // Overrides the built-in calculator type string with the provided argument.
// argument. Can be used to create nodes from pure interfaces. // Can be used to create nodes from pure interfaces.
// TODO: only use this for pure interfaces // TODO: only use this for pure interfaces
Node(const std::string& type_override) : NodeBase(type_override) {} Node(const std::string& type_override) : NodeBase(type_override) {}
@ -377,6 +377,29 @@ class PacketGenerator {
return *options_.MutableExtension(T::ext); return *options_.MutableExtension(T::ext);
} }
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
using PayloadT =
typename PortCommon<B, T, kIsOptional, kIsMultiple>::PayloadT;
if constexpr (std::is_same_v<B, SideOutputBase>) {
auto* base = &out_sides_[port.Tag()];
if constexpr (kIsMultiple) {
return MultiSideSource<PayloadT>(base);
} else {
return SideSource<PayloadT>(base);
}
} else if constexpr (std::is_same_v<B, SideInputBase>) {
auto* base = &in_sides_[port.Tag()];
if constexpr (kIsMultiple) {
return MultiSideDestination<PayloadT>(base);
} else {
return SideDestination<PayloadT>(base);
}
} else {
static_assert(dependent_false<B>::value, "Type not supported.");
}
}
private: private:
std::string type_; std::string type_;
TagIndexMap<DestinationBase> in_sides_; TagIndexMap<DestinationBase> in_sides_;
@ -402,7 +425,7 @@ class Graph {
} }
// Creates a node of a specific type. Should be used for pure interfaces, // Creates a node of a specific type. Should be used for pure interfaces,
// which do not have a built-in type std::string. // which do not have a built-in type string.
template <class Calc> template <class Calc>
Node<Calc>& AddNode(const std::string& type) { Node<Calc>& AddNode(const std::string& type) {
auto node = std::make_unique<Node<Calc>>(type); auto node = std::make_unique<Node<Calc>>(type);

View File

@ -6,8 +6,8 @@
namespace mediapipe { namespace mediapipe {
namespace api2 { namespace api2 {
// This class stores a constant std::string that can be inspected at compile // This class stores a constant string that can be inspected at compile time
// time in constexpr code. // in constexpr code.
class const_str { class const_str {
public: public:
constexpr const_str(std::size_t size, const char* data) constexpr const_str(std::size_t size, const char* data)

Some files were not shown because too many files have changed in this diff Show More