Project import generated by Copybara.
GitOrigin-RevId: 73d686c40057684f8bfaca285368bf1813f9fc26
This commit is contained in:
parent
e6c19885c6
commit
cc6a2f7af6
|
@ -1 +1 @@
|
|||
4.2.1
|
||||
5.0.0
|
||||
|
|
2
.github/ISSUE_TEMPLATE/50-other-issues.md
vendored
2
.github/ISSUE_TEMPLATE/50-other-issues.md
vendored
|
@ -10,5 +10,3 @@ For questions on how to work with MediaPipe, or support for problems that are no
|
|||
|
||||
If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/google/mediapipe/security).
|
||||
|
||||
For high-level discussions about MediaPipe, please post to discuss@mediapipe.org, for questions about the development or internal workings of MediaPipe, or if you would like to know how to contribute to MediaPipe, please post to developers@mediapipe.org.
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ RUN pip3 install tf_slim
|
|||
RUN ln -s /usr/bin/python3 /usr/bin/python
|
||||
|
||||
# Install bazel
|
||||
ARG BAZEL_VERSION=4.2.1
|
||||
ARG BAZEL_VERSION=5.0.0
|
||||
RUN mkdir /bazel && \
|
||||
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
|
||||
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
|
||||
|
|
|
@ -136,8 +136,8 @@ run code search using
|
|||
|
||||
## Community
|
||||
|
||||
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome
|
||||
MediaPipe related frameworks, libraries and software
|
||||
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
|
||||
curated list of awesome MediaPipe related frameworks, libraries and software
|
||||
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
|
||||
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
|
||||
community discussion around MediaPipe
|
||||
|
|
14
WORKSPACE
14
WORKSPACE
|
@ -61,11 +61,12 @@ http_archive(
|
|||
sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049",
|
||||
)
|
||||
|
||||
# Google Benchmark library.
|
||||
# Google Benchmark library v1.6.1 released on 2022-01-10.
|
||||
http_archive(
|
||||
name = "com_google_benchmark",
|
||||
urls = ["https://github.com/google/benchmark/archive/main.zip"],
|
||||
strip_prefix = "benchmark-main",
|
||||
urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.6.1.tar.gz"],
|
||||
strip_prefix = "benchmark-1.6.1",
|
||||
sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4",
|
||||
build_file = "@//third_party:benchmark.BUILD",
|
||||
)
|
||||
|
||||
|
@ -373,9 +374,9 @@ http_archive(
|
|||
)
|
||||
|
||||
# Tensorflow repo should always go after the other external dependencies.
|
||||
# 2021-12-02
|
||||
_TENSORFLOW_GIT_COMMIT = "18a1dc0ba806dc023808531f0373d9ec068e64bf"
|
||||
_TENSORFLOW_SHA256 = "85b90416f7a11339327777bccd634de00ca0de2cf334f5f0727edcb11ff9289a"
|
||||
# 2022-02-15
|
||||
_TENSORFLOW_GIT_COMMIT = "a3419acc751dfc19caf4d34a1594e1f76810ec58"
|
||||
_TENSORFLOW_SHA256 = "b95b2a83632d4055742ae1a2dcc96b45da6c12a339462dbc76c8bca505308e3a"
|
||||
http_archive(
|
||||
name = "org_tensorflow",
|
||||
urls = [
|
||||
|
@ -383,7 +384,6 @@ http_archive(
|
|||
],
|
||||
patches = [
|
||||
"@//third_party:org_tensorflow_compatibility_fixes.diff",
|
||||
"@//third_party:org_tensorflow_objc_cxx17.diff",
|
||||
# Diff is generated with a script, don't update it manually.
|
||||
"@//third_party:org_tensorflow_custom_ops.diff",
|
||||
],
|
||||
|
|
|
@ -109,7 +109,7 @@ for app in ${apps}; do
|
|||
if [[ ${category} != "shoe" ]]; then
|
||||
bazel_flags_extended+=(--define ${category}=true)
|
||||
fi
|
||||
bazel "${bazel_flags_extended[@]}"
|
||||
bazelisk "${bazel_flags_extended[@]}"
|
||||
cp -f "${bin}" "${apk}"
|
||||
fi
|
||||
apks+=(${apk})
|
||||
|
@ -120,7 +120,7 @@ for app in ${apps}; do
|
|||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_4
|
||||
fi
|
||||
bazel "${bazel_flags[@]}"
|
||||
bazelisk "${bazel_flags[@]}"
|
||||
cp -f "${bin}" "${apk}"
|
||||
if [[ ${app_name} == "templatematchingcpu" ]]; then
|
||||
switch_to_opencv_3
|
||||
|
|
|
@ -83,7 +83,7 @@ for app in ${apps}; do
|
|||
bazel_flags=("${default_bazel_flags[@]}")
|
||||
bazel_flags+=(${target})
|
||||
|
||||
bazel "${bazel_flags[@]}"
|
||||
bazelisk "${bazel_flags[@]}"
|
||||
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
|
||||
fi
|
||||
if [[ $build_only == false ]]; then
|
||||
|
|
|
@ -71,7 +71,7 @@ for app in ${apps}; do
|
|||
bazel_flags+=(--linkopt=-s)
|
||||
fi
|
||||
|
||||
bazel "${bazel_flags[@]}"
|
||||
bazelisk "${bazel_flags[@]}"
|
||||
cp -f "${bin_dir}/${app}/"*".ipa" "${out_dir}"
|
||||
fi
|
||||
done
|
||||
|
|
|
@ -169,7 +169,7 @@ behavior depending on resource constraints.
|
|||
|
||||
[`CalculatorBase`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator_base.h
|
||||
[`DefaultInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/default_input_stream_handler.h
|
||||
[`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.h
|
||||
[`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.h
|
||||
[`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
|
||||
[`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
|
||||
[`CalculatorGraphConfig::max_queue_size`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto
|
||||
[`FlowLimiterCalculator`]: https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/flow_limiter_calculator.cc
|
||||
|
|
|
@ -30,7 +30,7 @@ APIs (currently in alpha) that are now available in
|
|||
* Install MediaPipe following these [instructions](./install.md).
|
||||
* Setup Java Runtime.
|
||||
* Setup Android SDK release 30.0.0 and above.
|
||||
* Setup Android NDK version 18 and above.
|
||||
* Setup Android NDK version between 18 and 21.
|
||||
|
||||
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
|
||||
below for Android Studio setup). However, if you prefer using MediaPipe without
|
||||
|
|
|
@ -48,6 +48,16 @@ each project.
|
|||
bazel build -c opt --strip=ALWAYS \
|
||||
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
|
||||
--fat_apk_cpu=arm64-v8a,armeabi-v7a \
|
||||
--legacy_whole_archive=0 \
|
||||
--features=-legacy_whole_archive \
|
||||
--copt=-fvisibility=hidden \
|
||||
--copt=-ffunction-sections \
|
||||
--copt=-fdata-sections \
|
||||
--copt=-fstack-protector \
|
||||
--copt=-Oz \
|
||||
--copt=-fomit-frame-pointer \
|
||||
--copt=-DABSL_MIN_LOG_LEVEL=2 \
|
||||
--linkopt=-Wl,--gc-sections,--strip-all \
|
||||
//path/to/the/aar/build/file:aar_name.aar
|
||||
```
|
||||
|
||||
|
@ -57,6 +67,16 @@ each project.
|
|||
bazel build -c opt --strip=ALWAYS \
|
||||
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
|
||||
--fat_apk_cpu=arm64-v8a,armeabi-v7a \
|
||||
--legacy_whole_archive=0 \
|
||||
--features=-legacy_whole_archive \
|
||||
--copt=-fvisibility=hidden \
|
||||
--copt=-ffunction-sections \
|
||||
--copt=-fdata-sections \
|
||||
--copt=-fstack-protector \
|
||||
--copt=-Oz \
|
||||
--copt=-fomit-frame-pointer \
|
||||
--copt=-DABSL_MIN_LOG_LEVEL=2 \
|
||||
--linkopt=-Wl,--gc-sections,--strip-all \
|
||||
//mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mediapipe_face_detection.aar
|
||||
|
||||
# It should print:
|
||||
|
|
|
@ -569,7 +569,7 @@ next section.
|
|||
|
||||
Option 1. Follow
|
||||
[the official Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html)
|
||||
to install Bazel 4.2.1 or higher.
|
||||
to install Bazel 5.0.0 or higher.
|
||||
|
||||
Option 2. Follow the official
|
||||
[Bazel documentation](https://docs.bazel.build/versions/master/install-bazelisk.html)
|
||||
|
|
|
@ -126,6 +126,7 @@ following steps:
|
|||
}
|
||||
return packet.Get<MyType>();
|
||||
});
|
||||
}
|
||||
} // namespace mediapipe
|
||||
```
|
||||
|
||||
|
|
|
@ -136,8 +136,8 @@ run code search using
|
|||
|
||||
## Community
|
||||
|
||||
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome
|
||||
MediaPipe related frameworks, libraries and software
|
||||
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
|
||||
curated list of awesome MediaPipe related frameworks, libraries and software
|
||||
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
|
||||
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
|
||||
community discussion around MediaPipe
|
||||
|
|
|
@ -26,7 +26,7 @@ MediaPipe Face Detection is an ultrafast face detection solution that comes with
|
|||
face detector tailored for mobile GPU inference. The detector's super-realtime
|
||||
performance enables it to be applied to any live viewfinder experience that
|
||||
requires an accurate facial region of interest as an input for other
|
||||
task-specific models, such as 3D facial keypoint or geometry estimation (e.g.,
|
||||
task-specific models, such as 3D facial keypoint estimation (e.g.,
|
||||
[MediaPipe Face Mesh](./face_mesh.md)), facial features or expression
|
||||
classification, and face region segmentation. BlazeFace uses a lightweight
|
||||
feature extraction network inspired by, but distinct from
|
||||
|
|
|
@ -20,34 +20,34 @@ nav_order: 2
|
|||
|
||||
## Overview
|
||||
|
||||
MediaPipe Face Mesh is a face geometry solution that estimates 468 3D face
|
||||
landmarks in real-time even on mobile devices. It employs machine learning (ML)
|
||||
to infer the 3D surface geometry, requiring only a single camera input without
|
||||
the need for a dedicated depth sensor. Utilizing lightweight model architectures
|
||||
together with GPU acceleration throughout the pipeline, the solution delivers
|
||||
real-time performance critical for live experiences.
|
||||
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
|
||||
real-time even on mobile devices. It employs machine learning (ML) to infer the
|
||||
3D facial surface, requiring only a single camera input without the need for a
|
||||
dedicated depth sensor. Utilizing lightweight model architectures together with
|
||||
GPU acceleration throughout the pipeline, the solution delivers real-time
|
||||
performance critical for live experiences.
|
||||
|
||||
Additionally, the solution is bundled with the Face Geometry module that bridges
|
||||
the gap between the face landmark estimation and useful real-time augmented
|
||||
reality (AR) applications. It establishes a metric 3D space and uses the face
|
||||
landmark screen positions to estimate face geometry within that space. The face
|
||||
geometry data consists of common 3D geometry primitives, including a face pose
|
||||
transformation matrix and a triangular face mesh. Under the hood, a lightweight
|
||||
statistical analysis method called
|
||||
Additionally, the solution is bundled with the Face Transform module that
|
||||
bridges the gap between the face landmark estimation and useful real-time
|
||||
augmented reality (AR) applications. It establishes a metric 3D space and uses
|
||||
the face landmark screen positions to estimate a face transform within that
|
||||
space. The face transform data consists of common 3D primitives, including a
|
||||
face pose transformation matrix and a triangular face mesh. Under the hood, a
|
||||
lightweight statistical analysis method called
|
||||
[Procrustes Analysis](https://en.wikipedia.org/wiki/Procrustes_analysis) is
|
||||
employed to drive a robust, performant and portable logic. The analysis runs on
|
||||
CPU and has a minimal speed/memory footprint on top of the ML model inference.
|
||||
|
||||
![face_mesh_ar_effects.gif](../images/face_mesh_ar_effects.gif) |
|
||||
:-------------------------------------------------------------: |
|
||||
*Fig 1. AR effects utilizing facial surface geometry.* |
|
||||
*Fig 1. AR effects utilizing the 3D facial surface.* |
|
||||
|
||||
## ML Pipeline
|
||||
|
||||
Our ML pipeline consists of two real-time deep neural network models that work
|
||||
together: A detector that operates on the full image and computes face locations
|
||||
and a 3D face landmark model that operates on those locations and predicts the
|
||||
approximate surface geometry via regression. Having the face accurately cropped
|
||||
approximate 3D surface via regression. Having the face accurately cropped
|
||||
drastically reduces the need for common data augmentations like affine
|
||||
transformations consisting of rotations, translation and scale changes. Instead
|
||||
it allows the network to dedicate most of its capacity towards coordinate
|
||||
|
@ -55,8 +55,8 @@ prediction accuracy. In addition, in our pipeline the crops can also be
|
|||
generated based on the face landmarks identified in the previous frame, and only
|
||||
when the landmark model could no longer identify face presence is the face
|
||||
detector invoked to relocalize the face. This strategy is similar to that
|
||||
employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm detector
|
||||
together with a hand landmark model.
|
||||
employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm
|
||||
detector together with a hand landmark model.
|
||||
|
||||
The pipeline is implemented as a MediaPipe
|
||||
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
|
||||
|
@ -128,7 +128,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
|
|||
:---------------------------------------------------------------------------: |
|
||||
*Fig 3. Attention Mesh: Overview of model architecture.* |
|
||||
|
||||
## Face Geometry Module
|
||||
## Face Transform Module
|
||||
|
||||
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
|
||||
detection in the screen coordinate space: the X- and Y- coordinates are
|
||||
|
@ -140,7 +140,7 @@ enable the full spectrum of augmented reality (AR) features like aligning a
|
|||
virtual 3D object with a detected face.
|
||||
|
||||
The
|
||||
[Face Geometry module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
|
||||
[Face Transform module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
|
||||
moves away from the screen coordinate space towards a metric 3D space and
|
||||
provides necessary primitives to handle a detected face as a regular 3D object.
|
||||
By design, you'll be able to use a perspective camera to project the final 3D
|
||||
|
@ -151,7 +151,7 @@ landmark positions are not changed.
|
|||
|
||||
#### Metric 3D Space
|
||||
|
||||
The **Metric 3D space** established within the Face Geometry module is a
|
||||
The **Metric 3D space** established within the Face Transform module is a
|
||||
right-handed orthonormal metric 3D coordinate space. Within the space, there is
|
||||
a **virtual perspective camera** located at the space origin and pointed in the
|
||||
negative direction of the Z-axis. In the current pipeline, it is assumed that
|
||||
|
@ -184,11 +184,11 @@ functions:
|
|||
|
||||
### Components
|
||||
|
||||
#### Geometry Pipeline
|
||||
#### Transform Pipeline
|
||||
|
||||
The **Geometry Pipeline** is a key component, which is responsible for
|
||||
estimating face geometry objects within the Metric 3D space. On each frame, the
|
||||
following steps are executed in the given order:
|
||||
The **Transform Pipeline** is a key component, which is responsible for
|
||||
estimating the face transform objects within the Metric 3D space. On each frame,
|
||||
the following steps are executed in the given order:
|
||||
|
||||
- Face landmark screen coordinates are converted into the Metric 3D space
|
||||
coordinates;
|
||||
|
@ -199,12 +199,12 @@ following steps are executed in the given order:
|
|||
positions (XYZ), while both the vertex texture coordinates (UV) and the
|
||||
triangular topology are inherited from the canonical face model.
|
||||
|
||||
The geometry pipeline is implemented as a MediaPipe
|
||||
The transform pipeline is implemented as a MediaPipe
|
||||
[calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc).
|
||||
For your convenience, the face geometry pipeline calculator is bundled together
|
||||
with corresponding metadata into a unified MediaPipe
|
||||
For your convenience, this calculator is bundled together with corresponding
|
||||
metadata into a unified MediaPipe
|
||||
[subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt).
|
||||
The face geometry format is defined as a Protocol Buffer
|
||||
The face transform format is defined as a Protocol Buffer
|
||||
[message](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto).
|
||||
|
||||
#### Effect Renderer
|
||||
|
@ -227,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
|
|||
|
||||
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
|
||||
| :---------------------------------------------------------------------: |
|
||||
| *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* |
|
||||
| *Fig 5. An example of face effects rendered by the Face Transform Effect Renderer.* |
|
||||
|
||||
## Solution APIs
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ on how to build MediaPipe examples.
|
|||
|
||||
Note: The following runs TensorFlow inference on CPU. If you would like to
|
||||
run inference on GPU (Linux only), please follow
|
||||
[TensorFlow CUDA Support and Setup on Linux Desktop](gpu.md#tensorflow-cuda-support-and-setup-on-linux-desktop)
|
||||
[TensorFlow CUDA Support and Setup on Linux Desktop](../getting_started/gpu_support.md#tensorflow-cuda-support-and-setup-on-linux-desktop)
|
||||
instead.
|
||||
|
||||
To build the TensorFlow CPU inference example on desktop, run:
|
||||
|
|
|
@ -384,7 +384,7 @@ Supported configuration options:
|
|||
<meta charset="utf-8">
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
|
|
|
@ -359,7 +359,7 @@ Supported configuration options:
|
|||
<meta charset="utf-8">
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
|
|
|
@ -117,6 +117,7 @@ mediapipe_proto_library(
|
|||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
"//mediapipe/framework/formats:classification_proto",
|
||||
"//mediapipe/framework/formats:landmark_proto",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -309,8 +310,8 @@ cc_library(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "concatenate_normalized_landmark_list_calculator",
|
||||
srcs = ["concatenate_normalized_landmark_list_calculator.cc"],
|
||||
name = "concatenate_proto_list_calculator",
|
||||
srcs = ["concatenate_proto_list_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":concatenate_vector_calculator_cc_proto",
|
||||
|
@ -324,10 +325,10 @@ cc_library(
|
|||
)
|
||||
|
||||
cc_test(
|
||||
name = "concatenate_normalized_landmark_list_calculator_test",
|
||||
srcs = ["concatenate_normalized_landmark_list_calculator_test.cc"],
|
||||
name = "concatenate_proto_list_calculator_test",
|
||||
srcs = ["concatenate_proto_list_calculator_test.cc"],
|
||||
deps = [
|
||||
":concatenate_normalized_landmark_list_calculator",
|
||||
":concatenate_proto_list_calculator",
|
||||
":concatenate_vector_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
|
@ -964,8 +965,8 @@ cc_test(
|
|||
)
|
||||
|
||||
cc_library(
|
||||
name = "split_landmarks_calculator",
|
||||
srcs = ["split_landmarks_calculator.cc"],
|
||||
name = "split_proto_list_calculator",
|
||||
srcs = ["split_proto_list_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":split_vector_calculator_cc_proto",
|
||||
|
@ -979,10 +980,10 @@ cc_library(
|
|||
)
|
||||
|
||||
cc_test(
|
||||
name = "split_landmarks_calculator_test",
|
||||
srcs = ["split_landmarks_calculator_test.cc"],
|
||||
name = "split_proto_list_calculator_test",
|
||||
srcs = ["split_proto_list_calculator_test.cc"],
|
||||
deps = [
|
||||
":split_landmarks_calculator",
|
||||
":split_proto_list_calculator",
|
||||
":split_vector_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_runner",
|
||||
|
@ -1195,6 +1196,7 @@ cc_library(
|
|||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:collection_item_id",
|
||||
"//mediapipe/framework/formats:classification_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:integral_types",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
|
||||
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
|
||||
|
||||
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace api2 {
|
||||
|
||||
// Concatenates several NormalizedLandmarkList protos following stream index
|
||||
// order. This class assumes that every input stream contains a
|
||||
// NormalizedLandmarkList proto object.
|
||||
class ConcatenateNormalizedLandmarkListCalculator : public Node {
|
||||
public:
|
||||
static constexpr Input<NormalizedLandmarkList>::Multiple kIn{""};
|
||||
static constexpr Output<NormalizedLandmarkList> kOut{""};
|
||||
|
||||
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
|
||||
|
||||
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||
RET_CHECK_GE(kIn(cc).Count(), 1);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
only_emit_if_all_present_ =
|
||||
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
|
||||
.only_emit_if_all_present();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
if (only_emit_if_all_present_) {
|
||||
for (const auto& input : kIn(cc)) {
|
||||
if (input.IsEmpty()) return absl::OkStatus();
|
||||
}
|
||||
}
|
||||
|
||||
NormalizedLandmarkList output;
|
||||
for (const auto& input : kIn(cc)) {
|
||||
if (input.IsEmpty()) continue;
|
||||
const NormalizedLandmarkList& list = *input;
|
||||
for (int j = 0; j < list.landmark_size(); ++j) {
|
||||
*output.add_landmark() = list.landmark(j);
|
||||
}
|
||||
}
|
||||
kOut(cc).Send(std::move(output));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
bool only_emit_if_all_present_;
|
||||
};
|
||||
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
|
||||
|
||||
} // namespace api2
|
||||
} // namespace mediapipe
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_
|
118
mediapipe/calculators/core/concatenate_proto_list_calculator.cc
Normal file
118
mediapipe/calculators/core/concatenate_proto_list_calculator.cc
Normal file
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2019 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
|
||||
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
|
||||
|
||||
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
|
||||
#include "mediapipe/framework/api2/node.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace api2 {
|
||||
|
||||
// Concatenate several input packets of ListType with a repeated field of
|
||||
// ItemType into a single output packet of ListType following stream index
|
||||
// order.
|
||||
template <typename ItemType, typename ListType>
|
||||
class ConcatenateListsCalculator : public Node {
|
||||
public:
|
||||
static constexpr typename Input<ListType>::Multiple kIn{""};
|
||||
static constexpr Output<ListType> kOut{""};
|
||||
|
||||
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
|
||||
|
||||
static absl::Status UpdateContract(CalculatorContract* cc) {
|
||||
RET_CHECK_GE(kIn(cc).Count(), 1);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
only_emit_if_all_present_ =
|
||||
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
|
||||
.only_emit_if_all_present();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
if (only_emit_if_all_present_) {
|
||||
for (const auto& input : kIn(cc)) {
|
||||
if (input.IsEmpty()) return absl::OkStatus();
|
||||
}
|
||||
}
|
||||
|
||||
ListType output;
|
||||
for (const auto& input : kIn(cc)) {
|
||||
if (input.IsEmpty()) continue;
|
||||
const ListType& list = *input;
|
||||
for (int j = 0; j < ListSize(list); ++j) {
|
||||
*AddItem(output) = GetItem(list, j);
|
||||
}
|
||||
}
|
||||
kOut(cc).Send(std::move(output));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int ListSize(const ListType& list) const = 0;
|
||||
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
|
||||
virtual ItemType* AddItem(ListType& list) const = 0;
|
||||
|
||||
private:
|
||||
bool only_emit_if_all_present_;
|
||||
};
|
||||
|
||||
// TODO: Move calculators to separate *.cc files
|
||||
|
||||
class ConcatenateNormalizedLandmarkListCalculator
|
||||
: public ConcatenateListsCalculator<NormalizedLandmark,
|
||||
NormalizedLandmarkList> {
|
||||
protected:
|
||||
int ListSize(const NormalizedLandmarkList& list) const override {
|
||||
return list.landmark_size();
|
||||
}
|
||||
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
|
||||
int idx) const override {
|
||||
return list.landmark(idx);
|
||||
}
|
||||
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
|
||||
return list.add_landmark();
|
||||
}
|
||||
};
|
||||
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
|
||||
|
||||
class ConcatenateLandmarkListCalculator
|
||||
: public ConcatenateListsCalculator<Landmark, LandmarkList> {
|
||||
protected:
|
||||
int ListSize(const LandmarkList& list) const override {
|
||||
return list.landmark_size();
|
||||
}
|
||||
const Landmark GetItem(const LandmarkList& list, int idx) const override {
|
||||
return list.landmark(idx);
|
||||
}
|
||||
Landmark* AddItem(LandmarkList& list) const override {
|
||||
return list.add_landmark();
|
||||
}
|
||||
};
|
||||
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListCalculator);
|
||||
|
||||
} // namespace api2
|
||||
} // namespace mediapipe
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_
|
|
@ -18,6 +18,7 @@
|
|||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/collection_item_id.h"
|
||||
#include "mediapipe/framework/formats/classification.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/integral_types.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
|
@ -79,6 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
|
|||
packet.Set<uint64>();
|
||||
} else if (packet_options.has_classification_list_value()) {
|
||||
packet.Set<ClassificationList>();
|
||||
} else if (packet_options.has_landmark_list_value()) {
|
||||
packet.Set<LandmarkList>();
|
||||
} else {
|
||||
return absl::InvalidArgumentError(
|
||||
"None of supported values were specified in options.");
|
||||
|
@ -108,6 +111,9 @@ class ConstantSidePacketCalculator : public CalculatorBase {
|
|||
} else if (packet_options.has_classification_list_value()) {
|
||||
packet.Set(MakePacket<ClassificationList>(
|
||||
packet_options.classification_list_value()));
|
||||
} else if (packet_options.has_landmark_list_value()) {
|
||||
packet.Set(
|
||||
MakePacket<LandmarkList>(packet_options.landmark_list_value()));
|
||||
} else {
|
||||
return absl::InvalidArgumentError(
|
||||
"None of supported values were specified in options.");
|
||||
|
|
|
@ -18,6 +18,7 @@ package mediapipe;
|
|||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
import "mediapipe/framework/formats/classification.proto";
|
||||
import "mediapipe/framework/formats/landmark.proto";
|
||||
|
||||
option objc_class_prefix = "MediaPipe";
|
||||
|
||||
|
@ -34,6 +35,7 @@ message ConstantSidePacketCalculatorOptions {
|
|||
string string_value = 4;
|
||||
uint64 uint64_value = 5;
|
||||
ClassificationList classification_list_value = 6;
|
||||
LandmarkList landmark_list_value = 7;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,11 @@ namespace api2 {
|
|||
// This calculator periodically copies the GraphProfile from
|
||||
// mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream.
|
||||
//
|
||||
// Similarly to the log files saved by GraphProfiler::WriteProfile when trace
|
||||
// logging is enabled, the first captured profile contains the full
|
||||
// canonicalized graph config and, if tracing is enabled, calculator names in
|
||||
// graph traces. Subsequent profiles omit this information.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "GraphProfileCalculator"
|
||||
|
@ -50,11 +55,14 @@ class GraphProfileCalculator : public Node {
|
|||
absl::Status Process(CalculatorContext* cc) final {
|
||||
auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>();
|
||||
|
||||
if (prev_profile_ts_ == Timestamp::Unset() ||
|
||||
bool first_profile = prev_profile_ts_ == Timestamp::Unset();
|
||||
if (first_profile ||
|
||||
cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) {
|
||||
prev_profile_ts_ = cc->InputTimestamp();
|
||||
GraphProfile result;
|
||||
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(&result));
|
||||
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(
|
||||
&result, first_profile ? PopulateGraphConfig::kFull
|
||||
: PopulateGraphConfig::kNo));
|
||||
kProfileOut(cc).Send(result);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
|
|
|
@ -202,6 +202,8 @@ TEST_F(GraphProfileCalculatorTest, GraphProfile) {
|
|||
}
|
||||
})pb");
|
||||
|
||||
ASSERT_EQ(output_packets.size(), 2);
|
||||
EXPECT_TRUE(output_packets[0].Get<GraphProfile>().has_config());
|
||||
EXPECT_THAT(output_packets[1].Get<GraphProfile>(),
|
||||
mediapipe::EqualsProto(expected_profile));
|
||||
}
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
#include "mediapipe/framework/port/canonical_errors.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
// Quantizes a vector of floats to a std::string so that each float becomes a
|
||||
// byte in the [0, 255] range. Any value above max_quantized_value or below
|
||||
// Quantizes a vector of floats to a string so that each float becomes a byte
|
||||
// in the [0, 255] range. Any value above max_quantized_value or below
|
||||
// min_quantized_value will be saturated to '/xFF' or '/0'.
|
||||
//
|
||||
// Example config:
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
|
||||
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
|
||||
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
|
||||
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
|
||||
|
||||
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
|
@ -24,30 +24,30 @@
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
// Splits an input packet with LandmarkListType into
|
||||
// multiple LandmarkListType output packets using the [begin, end) ranges
|
||||
// Splits an input packet of ListType with a repeated field of ItemType
|
||||
// into multiple ListType output packets using the [begin, end) ranges
|
||||
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
|
||||
// set to true, all ranges should be of size 1 and all outputs will be elements
|
||||
// of type LandmarkType. If "element_only" is false, ranges can be
|
||||
// non-zero in size and all outputs will be of type LandmarkListType.
|
||||
// of type ItemType. If "element_only" is false, ranges can be
|
||||
// non-zero in size and all outputs will be of type ListType.
|
||||
// If the option "combine_outputs" is set to true, only one output stream can be
|
||||
// specified and all ranges of elements will be combined into one
|
||||
// LandmarkListType.
|
||||
template <typename LandmarkType, typename LandmarkListType>
|
||||
class SplitLandmarksCalculator : public CalculatorBase {
|
||||
// ListType.
|
||||
template <typename ItemType, typename ListType>
|
||||
class SplitListsCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
RET_CHECK(cc->Inputs().NumEntries() == 1);
|
||||
RET_CHECK(cc->Outputs().NumEntries() != 0);
|
||||
|
||||
cc->Inputs().Index(0).Set<LandmarkListType>();
|
||||
cc->Inputs().Index(0).Set<ListType>();
|
||||
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
|
||||
|
||||
if (options.combine_outputs()) {
|
||||
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
|
||||
cc->Outputs().Index(0).Set<LandmarkListType>();
|
||||
cc->Outputs().Index(0).Set<ListType>();
|
||||
for (int i = 0; i < options.ranges_size() - 1; ++i) {
|
||||
for (int j = i + 1; j < options.ranges_size(); ++j) {
|
||||
const auto& range_0 = options.ranges(i);
|
||||
|
@ -82,9 +82,9 @@ class SplitLandmarksCalculator : public CalculatorBase {
|
|||
return absl::InvalidArgumentError(
|
||||
"Since element_only is true, all ranges should be of size 1.");
|
||||
}
|
||||
cc->Outputs().Index(i).Set<LandmarkType>();
|
||||
cc->Outputs().Index(i).Set<ItemType>();
|
||||
} else {
|
||||
cc->Outputs().Index(i).Set<LandmarkListType>();
|
||||
cc->Outputs().Index(i).Set<ListType>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -111,39 +111,38 @@ class SplitLandmarksCalculator : public CalculatorBase {
|
|||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
const LandmarkListType& input =
|
||||
cc->Inputs().Index(0).Get<LandmarkListType>();
|
||||
RET_CHECK_GE(input.landmark_size(), max_range_end_)
|
||||
<< "Max range end " << max_range_end_ << " exceeds landmarks size "
|
||||
<< input.landmark_size();
|
||||
const ListType& input = cc->Inputs().Index(0).Get<ListType>();
|
||||
RET_CHECK_GE(ListSize(input), max_range_end_)
|
||||
<< "Max range end " << max_range_end_ << " exceeds list size "
|
||||
<< ListSize(input);
|
||||
|
||||
if (combine_outputs_) {
|
||||
LandmarkListType output;
|
||||
ListType output;
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||
const LandmarkType& input_landmark = input.landmark(j);
|
||||
*output.add_landmark() = input_landmark;
|
||||
const ItemType& input_item = GetItem(input, j);
|
||||
*AddItem(output) = input_item;
|
||||
}
|
||||
}
|
||||
RET_CHECK_EQ(output.landmark_size(), total_elements_);
|
||||
RET_CHECK_EQ(ListSize(output), total_elements_);
|
||||
cc->Outputs().Index(0).AddPacket(
|
||||
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
|
||||
MakePacket<ListType>(output).At(cc->InputTimestamp()));
|
||||
} else {
|
||||
if (element_only_) {
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
cc->Outputs().Index(i).AddPacket(
|
||||
MakePacket<LandmarkType>(input.landmark(ranges_[i].first))
|
||||
MakePacket<ItemType>(GetItem(input, ranges_[i].first))
|
||||
.At(cc->InputTimestamp()));
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < ranges_.size(); ++i) {
|
||||
LandmarkListType output;
|
||||
ListType output;
|
||||
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
|
||||
const LandmarkType& input_landmark = input.landmark(j);
|
||||
*output.add_landmark() = input_landmark;
|
||||
const ItemType& input_item = GetItem(input, j);
|
||||
*AddItem(output) = input_item;
|
||||
}
|
||||
cc->Outputs().Index(i).AddPacket(
|
||||
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
|
||||
MakePacket<ListType>(output).At(cc->InputTimestamp()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -151,6 +150,11 @@ class SplitLandmarksCalculator : public CalculatorBase {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int ListSize(const ListType& list) const = 0;
|
||||
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
|
||||
virtual ItemType* AddItem(ListType& list) const = 0;
|
||||
|
||||
private:
|
||||
std::vector<std::pair<int32, int32>> ranges_;
|
||||
int32 max_range_end_ = -1;
|
||||
|
@ -159,15 +163,40 @@ class SplitLandmarksCalculator : public CalculatorBase {
|
|||
bool combine_outputs_ = false;
|
||||
};
|
||||
|
||||
typedef SplitLandmarksCalculator<NormalizedLandmark, NormalizedLandmarkList>
|
||||
SplitNormalizedLandmarkListCalculator;
|
||||
// TODO: Move calculators to separate *.cc files
|
||||
|
||||
class SplitNormalizedLandmarkListCalculator
|
||||
: public SplitListsCalculator<NormalizedLandmark, NormalizedLandmarkList> {
|
||||
protected:
|
||||
int ListSize(const NormalizedLandmarkList& list) const override {
|
||||
return list.landmark_size();
|
||||
}
|
||||
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
|
||||
int idx) const override {
|
||||
return list.landmark(idx);
|
||||
}
|
||||
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
|
||||
return list.add_landmark();
|
||||
}
|
||||
};
|
||||
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
|
||||
|
||||
typedef SplitLandmarksCalculator<Landmark, LandmarkList>
|
||||
SplitLandmarkListCalculator;
|
||||
class SplitLandmarkListCalculator
|
||||
: public SplitListsCalculator<Landmark, LandmarkList> {
|
||||
protected:
|
||||
int ListSize(const LandmarkList& list) const override {
|
||||
return list.landmark_size();
|
||||
}
|
||||
const Landmark GetItem(const LandmarkList& list, int idx) const override {
|
||||
return list.landmark(idx);
|
||||
}
|
||||
Landmark* AddItem(LandmarkList& list) const override {
|
||||
return list.add_landmark();
|
||||
}
|
||||
};
|
||||
REGISTER_CALCULATOR(SplitLandmarkListCalculator);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_
|
||||
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
// Calculator that converts a std::string into an integer type, or fails if the
|
||||
// Calculator that converts a string into an integer type, or fails if the
|
||||
// conversion is not possible.
|
||||
//
|
||||
// Example config:
|
||||
|
@ -47,7 +47,7 @@ class StringToIntCalculatorTemplate : public CalculatorBase {
|
|||
if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get<std::string>(),
|
||||
&number)) {
|
||||
return absl::InvalidArgumentError(
|
||||
"The std::string could not be parsed as an integer.");
|
||||
"The string could not be parsed as an integer.");
|
||||
}
|
||||
cc->OutputSidePackets().Index(0).Set(MakePacket<IntType>(number));
|
||||
return absl::OkStatus();
|
||||
|
|
|
@ -239,10 +239,13 @@ cc_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":image_transformation_calculator_cc_proto",
|
||||
"//mediapipe/framework:packet",
|
||||
"//mediapipe/framework:timestamp",
|
||||
"//mediapipe/gpu:scale_mode_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/formats:video_stream_header",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
|
|
|
@ -105,7 +105,7 @@ absl::StatusOr<ImageFileProperties> GetImageFileProperites(
|
|||
} // namespace
|
||||
|
||||
// Calculator to extract EXIF information from an image file. The input is
|
||||
// a std::string containing raw byte data from a file, and the output is an
|
||||
// a string containing raw byte data from a file, and the output is an
|
||||
// ImageFileProperties proto object with the relevant fields filled in.
|
||||
// The calculator accepts the input as a stream or a side packet, and can output
|
||||
// the result as a stream or a side packet. The calculator checks that if an
|
||||
|
|
|
@ -16,10 +16,13 @@
|
|||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/formats/video_stream_header.h"
|
||||
#include "mediapipe/framework/packet.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h"
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/timestamp.h"
|
||||
#include "mediapipe/gpu/scale_mode.pb.h"
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
|
@ -52,6 +55,7 @@ namespace mediapipe {
|
|||
namespace {
|
||||
constexpr char kImageFrameTag[] = "IMAGE";
|
||||
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
||||
constexpr char kVideoPrestreamTag[] = "VIDEO_PRESTREAM";
|
||||
|
||||
int RotationModeToDegrees(mediapipe::RotationMode_Mode rotation) {
|
||||
switch (rotation) {
|
||||
|
@ -122,6 +126,12 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
|
|||
// provided, it overrides the FLIP_VERTICALLY input side packet and/or
|
||||
// corresponding field in the calculator options.
|
||||
//
|
||||
// VIDEO_PRESTREAM (optional): VideoHeader for the input ImageFrames, if
|
||||
// rotating or scaling the frames, the header width and height will be updated
|
||||
// appropriately. Note the header is updated only based on dimensions and
|
||||
// rotations specified as side packets or options, input_stream
|
||||
// transformations will not update the header.
|
||||
//
|
||||
// Output:
|
||||
// One of the following tags:
|
||||
// IMAGE - ImageFrame representing the output image.
|
||||
|
@ -242,6 +252,21 @@ absl::Status ImageTransformationCalculator::GetContract(
|
|||
cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>();
|
||||
}
|
||||
|
||||
RET_CHECK(cc->Inputs().HasTag(kVideoPrestreamTag) ==
|
||||
cc->Outputs().HasTag(kVideoPrestreamTag))
|
||||
<< "If VIDEO_PRESTREAM is provided, it must be provided both as an "
|
||||
"inputs and output stream.";
|
||||
if (cc->Inputs().HasTag(kVideoPrestreamTag)) {
|
||||
RET_CHECK(!(cc->Inputs().HasTag("OUTPUT_DIMENSIONS") ||
|
||||
cc->Inputs().HasTag("ROTATION_DEGREES")))
|
||||
<< "If specifying VIDEO_PRESTREAM, the transformations that affect the "
|
||||
"dimensions of the frames (OUTPUT_DIMENSIONS and ROTATION_DEGREES) "
|
||||
"need to be constant for every frame, meaning they can only be "
|
||||
"provided in the calculator options or side packets.";
|
||||
cc->Inputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
|
||||
cc->Outputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
|
||||
}
|
||||
|
||||
if (cc->InputSidePackets().HasTag("OUTPUT_DIMENSIONS")) {
|
||||
cc->InputSidePackets().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>();
|
||||
}
|
||||
|
@ -326,6 +351,24 @@ absl::Status ImageTransformationCalculator::Open(CalculatorContext* cc) {
|
|||
}
|
||||
|
||||
absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
|
||||
// First update the video header if it is given, based on the rotation and
|
||||
// dimensions specified as side packets or options. This will only be done
|
||||
// once, so streaming transformation changes will not be reflected in
|
||||
// the header.
|
||||
if (cc->Inputs().HasTag(kVideoPrestreamTag) &&
|
||||
!cc->Inputs().Tag(kVideoPrestreamTag).IsEmpty() &&
|
||||
cc->Outputs().HasTag(kVideoPrestreamTag)) {
|
||||
mediapipe::VideoHeader header =
|
||||
cc->Inputs().Tag(kVideoPrestreamTag).Get<mediapipe::VideoHeader>();
|
||||
// Update the header's width and height if needed.
|
||||
ComputeOutputDimensions(header.width, header.height, &header.width,
|
||||
&header.height);
|
||||
cc->Outputs()
|
||||
.Tag(kVideoPrestreamTag)
|
||||
.AddPacket(mediapipe::MakePacket<mediapipe::VideoHeader>(header).At(
|
||||
mediapipe::Timestamp::PreStream()));
|
||||
}
|
||||
|
||||
// Override values if specified so.
|
||||
if (cc->Inputs().HasTag("ROTATION_DEGREES") &&
|
||||
!cc->Inputs().Tag("ROTATION_DEGREES").IsEmpty()) {
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
// Takes in an encoded image std::string, decodes it by OpenCV, and converts to
|
||||
// an ImageFrame. Note that this calculator only supports grayscale and RGB
|
||||
// images for now.
|
||||
// Takes in an encoded image string, decodes it by OpenCV, and converts to an
|
||||
// ImageFrame. Note that this calculator only supports grayscale and RGB images
|
||||
// for now.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
|
||||
namespace mediapipe {
|
||||
|
||||
// Takes in a std::string, draws the text std::string by cv::putText(), and
|
||||
// outputs an ImageFrame.
|
||||
// Takes in a string, draws the text string by cv::putText(), and outputs an
|
||||
// ImageFrame.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
|
|
|
@ -553,7 +553,6 @@ absl::Status ScaleImageCalculator::Process(CalculatorContext* cc) {
|
|||
}
|
||||
}
|
||||
|
||||
cc->GetCounter("Inputs")->Increment();
|
||||
const ImageFrame* image_frame;
|
||||
ImageFrame converted_image_frame;
|
||||
if (input_format_ == ImageFormat::YCBCR420P) {
|
||||
|
|
|
@ -183,22 +183,22 @@ absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
|
|||
absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
|
||||
// Setup source images.
|
||||
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
|
||||
const cv::Mat current_mat = mediapipe::formats::MatView(¤t_frame);
|
||||
RET_CHECK_EQ(current_mat.type(), CV_32FC1)
|
||||
auto current_mat = mediapipe::formats::MatView(¤t_frame);
|
||||
RET_CHECK_EQ(current_mat->type(), CV_32FC1)
|
||||
<< "Only 1-channel float input image is supported.";
|
||||
|
||||
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
|
||||
const cv::Mat previous_mat = mediapipe::formats::MatView(&previous_frame);
|
||||
RET_CHECK_EQ(previous_mat.type(), current_mat.type())
|
||||
<< "Warning: mixing input format types: " << previous_mat.type()
|
||||
<< " != " << previous_mat.type();
|
||||
auto previous_mat = mediapipe::formats::MatView(&previous_frame);
|
||||
RET_CHECK_EQ(previous_mat->type(), current_mat->type())
|
||||
<< "Warning: mixing input format types: " << previous_mat->type()
|
||||
<< " != " << previous_mat->type();
|
||||
|
||||
RET_CHECK_EQ(current_mat.rows, previous_mat.rows);
|
||||
RET_CHECK_EQ(current_mat.cols, previous_mat.cols);
|
||||
RET_CHECK_EQ(current_mat->rows, previous_mat->rows);
|
||||
RET_CHECK_EQ(current_mat->cols, previous_mat->cols);
|
||||
|
||||
// Setup destination image.
|
||||
auto output_frame = std::make_shared<ImageFrame>(
|
||||
current_frame.image_format(), current_mat.cols, current_mat.rows);
|
||||
current_frame.image_format(), current_mat->cols, current_mat->rows);
|
||||
cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get());
|
||||
output_mat.setTo(cv::Scalar(0));
|
||||
|
||||
|
@ -233,8 +233,8 @@ absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
|
|||
// Write directly to the first channel of output.
|
||||
for (int i = 0; i < output_mat.rows; ++i) {
|
||||
float* out_ptr = output_mat.ptr<float>(i);
|
||||
const float* curr_ptr = current_mat.ptr<float>(i);
|
||||
const float* prev_ptr = previous_mat.ptr<float>(i);
|
||||
const float* curr_ptr = current_mat->ptr<float>(i);
|
||||
const float* prev_ptr = previous_mat->ptr<float>(i);
|
||||
for (int j = 0; j < output_mat.cols; ++j) {
|
||||
const float new_mask_value = curr_ptr[j];
|
||||
const float prev_mask_value = prev_ptr[j];
|
||||
|
|
|
@ -116,8 +116,8 @@ void RunGraph(Packet curr_packet, Packet prev_packet, bool use_gpu, float ratio,
|
|||
ASSERT_EQ(1, output_packets.size());
|
||||
|
||||
Image result_image = output_packets[0].Get<Image>();
|
||||
cv::Mat result_mat = formats::MatView(&result_image);
|
||||
result_mat.copyTo(*result);
|
||||
auto result_mat = formats::MatView(&result_image);
|
||||
result_mat->copyTo(*result);
|
||||
|
||||
// Fully close graph at end, otherwise calculator+Images are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
|
@ -135,10 +135,10 @@ void RunTest(bool use_gpu, float mix_ratio, cv::Mat& test_result) {
|
|||
|
||||
Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
|
||||
ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height));
|
||||
curr_mat.copyTo(formats::MatView(&(curr_packet.Get<Image>())));
|
||||
curr_mat.copyTo(*formats::MatView(&(curr_packet.Get<Image>())));
|
||||
Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
|
||||
ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height));
|
||||
prev_mat.copyTo(formats::MatView(&(prev_packet.Get<Image>())));
|
||||
prev_mat.copyTo(*formats::MatView(&(prev_packet.Get<Image>())));
|
||||
|
||||
cv::Mat result;
|
||||
RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result);
|
||||
|
|
|
@ -84,14 +84,15 @@ cc_library(
|
|||
tags = ["nomac"], # config problem with cpuinfo via TF
|
||||
deps = [
|
||||
"inference_calculator_interface",
|
||||
"//mediapipe/framework/deps:file_path",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/util/tflite:config",
|
||||
"//mediapipe/util/tflite:tflite_gpu_runner",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
|
||||
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
@ -154,7 +155,7 @@ cc_library(
|
|||
|
||||
cc_library(
|
||||
name = "inference_calculator_gl_if_compute_shader_available",
|
||||
deps = select({
|
||||
deps = selects.with_or({
|
||||
":compute_shader_unavailable": [],
|
||||
"//conditions:default": [":inference_calculator_gl"],
|
||||
}),
|
||||
|
@ -303,7 +304,7 @@ cc_library(
|
|||
"//mediapipe/framework/formats:tensor",
|
||||
"//mediapipe/framework/formats/object_detection:anchor_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
] + select({
|
||||
] + selects.with_or({
|
||||
":compute_shader_unavailable": [],
|
||||
"//conditions:default": [":tensors_to_detections_calculator_gpu_deps"],
|
||||
}),
|
||||
|
@ -560,7 +561,7 @@ cc_library(
|
|||
|
||||
cc_library(
|
||||
name = "image_to_tensor_calculator_gpu_deps",
|
||||
deps = select({
|
||||
deps = selects.with_or({
|
||||
"//mediapipe:android": [
|
||||
":image_to_tensor_converter_gl_buffer",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
|
@ -684,7 +685,7 @@ cc_library(
|
|||
name = "image_to_tensor_converter_gl_buffer",
|
||||
srcs = ["image_to_tensor_converter_gl_buffer.cc"],
|
||||
hdrs = ["image_to_tensor_converter_gl_buffer.h"],
|
||||
deps = ["//mediapipe/framework:port"] + select({
|
||||
deps = ["//mediapipe/framework:port"] + selects.with_or({
|
||||
"//mediapipe:apple": [],
|
||||
"//conditions:default": [
|
||||
":image_to_tensor_converter",
|
||||
|
|
|
@ -49,7 +49,6 @@
|
|||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
namespace mediapipe {
|
||||
|
@ -142,11 +141,24 @@ class ImageToTensorCalculator : public Node {
|
|||
const auto& options =
|
||||
cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
|
||||
|
||||
RET_CHECK(options.has_output_tensor_float_range())
|
||||
RET_CHECK(options.has_output_tensor_float_range() ||
|
||||
options.has_output_tensor_int_range())
|
||||
<< "Output tensor range is required.";
|
||||
if (options.has_output_tensor_float_range()) {
|
||||
RET_CHECK_LT(options.output_tensor_float_range().min(),
|
||||
options.output_tensor_float_range().max())
|
||||
<< "Valid output tensor range is required.";
|
||||
<< "Valid output float tensor range is required.";
|
||||
}
|
||||
if (options.has_output_tensor_int_range()) {
|
||||
RET_CHECK_LT(options.output_tensor_int_range().min(),
|
||||
options.output_tensor_int_range().max())
|
||||
<< "Valid output int tensor range is required.";
|
||||
RET_CHECK_GE(options.output_tensor_int_range().min(), 0)
|
||||
<< "The minimum of the output int tensor range must be non-negative.";
|
||||
RET_CHECK_LE(options.output_tensor_int_range().max(), 255)
|
||||
<< "The maximum of the output int tensor range must be less than or "
|
||||
"equal to 255.";
|
||||
}
|
||||
RET_CHECK_GT(options.output_tensor_width(), 0)
|
||||
<< "Valid output tensor width is required.";
|
||||
RET_CHECK_GT(options.output_tensor_height(), 0)
|
||||
|
@ -175,9 +187,15 @@ class ImageToTensorCalculator : public Node {
|
|||
options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
|
||||
output_width_ = options_.output_tensor_width();
|
||||
output_height_ = options_.output_tensor_height();
|
||||
range_min_ = options_.output_tensor_float_range().min();
|
||||
range_max_ = options_.output_tensor_float_range().max();
|
||||
|
||||
is_int_output_ = options_.has_output_tensor_int_range();
|
||||
range_min_ =
|
||||
is_int_output_
|
||||
? static_cast<float>(options_.output_tensor_int_range().min())
|
||||
: options_.output_tensor_float_range().min();
|
||||
range_max_ =
|
||||
is_int_output_
|
||||
? static_cast<float>(options_.output_tensor_int_range().max())
|
||||
: options_.output_tensor_float_range().max();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -225,7 +243,7 @@ class ImageToTensorCalculator : public Node {
|
|||
}
|
||||
|
||||
// Lazy initialization of the GPU or CPU converter.
|
||||
MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, image->UsesGpu()));
|
||||
MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, *image.get()));
|
||||
|
||||
ASSIGN_OR_RETURN(Tensor tensor,
|
||||
(image->UsesGpu() ? gpu_converter_ : cpu_converter_)
|
||||
|
@ -283,9 +301,15 @@ class ImageToTensorCalculator : public Node {
|
|||
}
|
||||
}
|
||||
|
||||
absl::Status InitConverterIfNecessary(CalculatorContext* cc, bool use_gpu) {
|
||||
absl::Status InitConverterIfNecessary(CalculatorContext* cc,
|
||||
const Image& image) {
|
||||
// Lazy initialization of the GPU or CPU converter.
|
||||
if (use_gpu) {
|
||||
if (image.UsesGpu()) {
|
||||
if (is_int_output_) {
|
||||
return absl::UnimplementedError(
|
||||
"ImageToTensorConverter for the input GPU image currently doesn't "
|
||||
"support quantization.");
|
||||
}
|
||||
if (!gpu_converter_) {
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
@ -296,9 +320,17 @@ class ImageToTensorCalculator : public Node {
|
|||
CreateImageToGlBufferTensorConverter(
|
||||
cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
|
||||
#else
|
||||
ASSIGN_OR_RETURN(gpu_converter_,
|
||||
// Check whether the underlying storage object is a GL texture.
|
||||
if (image.GetGpuBuffer()
|
||||
.internal_storage<mediapipe::GlTextureBuffer>()) {
|
||||
ASSIGN_OR_RETURN(
|
||||
gpu_converter_,
|
||||
CreateImageToGlTextureTensorConverter(
|
||||
cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"ImageToTensorConverter for the input GPU image is unavailable.");
|
||||
}
|
||||
#endif // MEDIAPIPE_METAL_ENABLED
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
}
|
||||
|
@ -306,7 +338,10 @@ class ImageToTensorCalculator : public Node {
|
|||
if (!cpu_converter_) {
|
||||
#if !MEDIAPIPE_DISABLE_OPENCV
|
||||
ASSIGN_OR_RETURN(cpu_converter_,
|
||||
CreateOpenCvConverter(cc, GetBorderMode()));
|
||||
CreateOpenCvConverter(
|
||||
cc, GetBorderMode(),
|
||||
is_int_output_ ? Tensor::ElementType::kUInt8
|
||||
: Tensor::ElementType::kFloat32));
|
||||
#else
|
||||
LOG(FATAL) << "Cannot create image to tensor opencv converter since "
|
||||
"MEDIAPIPE_DISABLE_OPENCV is defined.";
|
||||
|
@ -321,6 +356,7 @@ class ImageToTensorCalculator : public Node {
|
|||
mediapipe::ImageToTensorCalculatorOptions options_;
|
||||
int output_width_ = 0;
|
||||
int output_height_ = 0;
|
||||
bool is_int_output_ = false;
|
||||
float range_min_ = 0.0f;
|
||||
float range_max_ = 1.0f;
|
||||
};
|
||||
|
|
|
@ -31,6 +31,14 @@ message ImageToTensorCalculatorOptions {
|
|||
optional float max = 2;
|
||||
}
|
||||
|
||||
// Range of int values [min, max].
|
||||
// min, must be strictly less than max.
|
||||
// Please note that IntRange is supported for CPU tensors only.
|
||||
message IntRange {
|
||||
optional int64 min = 1;
|
||||
optional int64 max = 2;
|
||||
}
|
||||
|
||||
// Pixel extrapolation methods. See @border_mode.
|
||||
enum BorderMode {
|
||||
BORDER_UNSPECIFIED = 0;
|
||||
|
@ -49,6 +57,7 @@ message ImageToTensorCalculatorOptions {
|
|||
// Output tensor element range/type image pixels are converted to.
|
||||
oneof range {
|
||||
FloatRange output_tensor_float_range = 4;
|
||||
IntRange output_tensor_int_range = 7;
|
||||
}
|
||||
|
||||
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs
|
||||
|
|
|
@ -61,7 +61,8 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
|||
float range_max, int tensor_width,
|
||||
int tensor_height, bool keep_aspect,
|
||||
absl::optional<BorderMode> border_mode,
|
||||
const mediapipe::NormalizedRect& roi) {
|
||||
const mediapipe::NormalizedRect& roi,
|
||||
bool output_int_tensor) {
|
||||
std::string border_mode_str;
|
||||
if (border_mode) {
|
||||
switch (*border_mode) {
|
||||
|
@ -73,6 +74,21 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
|||
break;
|
||||
}
|
||||
}
|
||||
std::string output_tensor_range;
|
||||
if (output_int_tensor) {
|
||||
output_tensor_range = absl::Substitute(R"(output_tensor_int_range {
|
||||
min: $0
|
||||
max: $1
|
||||
})",
|
||||
static_cast<int>(range_min),
|
||||
static_cast<int>(range_max));
|
||||
} else {
|
||||
output_tensor_range = absl::Substitute(R"(output_tensor_float_range {
|
||||
min: $0
|
||||
max: $1
|
||||
})",
|
||||
range_min, range_max);
|
||||
}
|
||||
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
||||
absl::Substitute(R"(
|
||||
input_stream: "input_image"
|
||||
|
@ -86,22 +102,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
|||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: $0
|
||||
output_tensor_height: $1
|
||||
keep_aspect_ratio: $4
|
||||
output_tensor_float_range {
|
||||
min: $2
|
||||
max: $3
|
||||
}
|
||||
$5 # border mode
|
||||
keep_aspect_ratio: $2
|
||||
$3 # output range
|
||||
$4 # border mode
|
||||
}
|
||||
}
|
||||
}
|
||||
)",
|
||||
/*$0=*/tensor_width,
|
||||
/*$1=*/tensor_height,
|
||||
/*$2=*/range_min,
|
||||
/*$3=*/range_max,
|
||||
/*$4=*/keep_aspect ? "true" : "false",
|
||||
/*$5=*/border_mode_str));
|
||||
/*$2=*/keep_aspect ? "true" : "false",
|
||||
/*$3=*/output_tensor_range,
|
||||
/*$4=*/border_mode_str));
|
||||
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
@ -126,11 +138,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
|||
ASSERT_THAT(tensor_vec, testing::SizeIs(1));
|
||||
|
||||
const Tensor& tensor = tensor_vec[0];
|
||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
|
||||
|
||||
auto view = tensor.GetCpuReadView();
|
||||
cv::Mat tensor_mat(tensor_height, tensor_width, CV_32FC3,
|
||||
cv::Mat tensor_mat;
|
||||
if (output_int_tensor) {
|
||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8);
|
||||
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_8UC3,
|
||||
const_cast<uint8*>(view.buffer<uint8>()));
|
||||
} else {
|
||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
|
||||
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_32FC3,
|
||||
const_cast<float*>(view.buffer<float>()));
|
||||
}
|
||||
|
||||
cv::Mat result_rgb;
|
||||
auto transformation =
|
||||
GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f).value();
|
||||
|
@ -170,16 +189,26 @@ enum class InputType { kImageFrame, kImage };
|
|||
const std::vector<InputType> kInputTypesToTest = {InputType::kImageFrame,
|
||||
InputType::kImage};
|
||||
|
||||
void RunTest(cv::Mat input, cv::Mat expected_result, float range_min,
|
||||
float range_max, int tensor_width, int tensor_height,
|
||||
bool keep_aspect, absl::optional<BorderMode> border_mode,
|
||||
void RunTest(cv::Mat input, cv::Mat expected_result,
|
||||
std::vector<float> float_range, std::vector<int> int_range,
|
||||
int tensor_width, int tensor_height, bool keep_aspect,
|
||||
absl::optional<BorderMode> border_mode,
|
||||
const mediapipe::NormalizedRect& roi) {
|
||||
ASSERT_EQ(2, float_range.size());
|
||||
ASSERT_EQ(2, int_range.size());
|
||||
for (auto input_type : kInputTypesToTest) {
|
||||
RunTestWithInputImagePacket(
|
||||
input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
|
||||
: MakeImagePacket(input),
|
||||
expected_result, range_min, range_max, tensor_width, tensor_height,
|
||||
keep_aspect, border_mode, roi);
|
||||
expected_result, float_range[0], float_range[1], tensor_width,
|
||||
tensor_height, keep_aspect, border_mode, roi,
|
||||
/*output_int_tensor=*/false);
|
||||
RunTestWithInputImagePacket(
|
||||
input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
|
||||
: MakeImagePacket(input),
|
||||
expected_result, int_range[0], int_range[1], tensor_width,
|
||||
tensor_height, keep_aspect, border_mode, roi,
|
||||
/*output_int_tensor=*/true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,8 +224,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspect) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
|
||||
/*border mode*/ {}, roi);
|
||||
}
|
||||
|
@ -213,8 +242,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectBorderZero) {
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_border_zero.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -231,7 +260,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectWithRotation) {
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_with_rotation.png"),
|
||||
/*range_min=*/0.0f, /*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
|
||||
BorderMode::kReplicate, roi);
|
||||
}
|
||||
|
@ -249,7 +279,8 @@ TEST(ImageToTensorCalculatorTest,
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png"),
|
||||
/*range_min=*/0.0f, /*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -267,8 +298,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotation) {
|
|||
GetRgb(
|
||||
"/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"),
|
||||
/*range_min=*/-1.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{-1.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
|
||||
BorderMode::kReplicate, roi);
|
||||
}
|
||||
|
@ -285,8 +316,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotationBorderZero) {
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"medium_sub_rect_with_rotation_border_zero.png"),
|
||||
/*range_min=*/-1.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{-1.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -302,8 +333,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRect) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
|
||||
BorderMode::kReplicate, roi);
|
||||
}
|
||||
|
@ -320,8 +351,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectBorderZero) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -338,8 +369,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspect) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
BorderMode::kReplicate, roi);
|
||||
}
|
||||
|
@ -356,8 +387,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectBorderZero) {
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_border_zero.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -374,8 +405,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectWithRotation) {
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_with_rotation.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
/*border_mode=*/{}, roi);
|
||||
}
|
||||
|
@ -393,8 +424,8 @@ TEST(ImageToTensorCalculatorTest,
|
|||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/"
|
||||
"large_sub_rect_keep_aspect_with_rotation_border_zero.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
/*border_mode=*/BorderMode::kZero, roi);
|
||||
}
|
||||
|
@ -410,8 +441,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRange) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/noop_except_range.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
BorderMode::kReplicate, roi);
|
||||
}
|
||||
|
@ -427,8 +458,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRangeBorderZero) {
|
|||
"tensor/testdata/image_to_tensor/input.jpg"),
|
||||
GetRgb("/mediapipe/calculators/"
|
||||
"tensor/testdata/image_to_tensor/noop_except_range.png"),
|
||||
/*range_min=*/0.0f,
|
||||
/*range_max=*/1.0f,
|
||||
/*float_range=*/{0.0f, 1.0f},
|
||||
/*int_range=*/{0, 255},
|
||||
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
|
||||
BorderMode::kZero, roi);
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
@ -339,4 +339,4 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
|
|||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
@ -37,6 +37,6 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
|
|||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
@ -85,4 +85,4 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
|
|||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
@ -40,6 +40,6 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context);
|
|||
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_UTILS_H_
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_utils.h"
|
||||
#include "mediapipe/framework/port/gtest.h"
|
||||
|
@ -46,4 +46,4 @@ TEST(ImageToTensorConverterGlUtilsTest, GlTexParameteriOverrider) {
|
|||
} // namespace
|
||||
} // namespace mediapipe
|
||||
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
|
|
@ -35,7 +35,8 @@ namespace {
|
|||
|
||||
class OpenCvProcessor : public ImageToTensorConverter {
|
||||
public:
|
||||
OpenCvProcessor(BorderMode border_mode) {
|
||||
OpenCvProcessor(BorderMode border_mode, Tensor::ElementType tensor_type)
|
||||
: tensor_type_(tensor_type) {
|
||||
switch (border_mode) {
|
||||
case BorderMode::kReplicate:
|
||||
border_mode_ = cv::BORDER_REPLICATE;
|
||||
|
@ -44,6 +45,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
|||
border_mode_ = cv::BORDER_CONSTANT;
|
||||
break;
|
||||
}
|
||||
mat_type_ = tensor_type == Tensor::ElementType::kUInt8 ? CV_8UC3 : CV_32FC3;
|
||||
}
|
||||
|
||||
absl::StatusOr<Tensor> Convert(const mediapipe::Image& input,
|
||||
|
@ -56,15 +58,20 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
|||
absl::StrCat("Only RGBA/RGB formats are supported, passed format: ",
|
||||
static_cast<uint32_t>(input.image_format())));
|
||||
}
|
||||
cv::Mat src = mediapipe::formats::MatView(&input);
|
||||
auto src = mediapipe::formats::MatView(&input);
|
||||
|
||||
constexpr int kNumChannels = 3;
|
||||
Tensor tensor(
|
||||
Tensor::ElementType::kFloat32,
|
||||
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
|
||||
Tensor tensor(tensor_type_, Tensor::Shape{1, output_dims.height,
|
||||
output_dims.width, kNumChannels});
|
||||
auto buffer_view = tensor.GetCpuWriteView();
|
||||
cv::Mat dst(output_dims.height, output_dims.width, CV_32FC3,
|
||||
cv::Mat dst;
|
||||
if (tensor_type_ == Tensor::ElementType::kUInt8) {
|
||||
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
|
||||
buffer_view.buffer<uint8>());
|
||||
} else {
|
||||
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
|
||||
buffer_view.buffer<float>());
|
||||
}
|
||||
|
||||
const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y),
|
||||
cv::Size2f(roi.width, roi.height),
|
||||
|
@ -85,7 +92,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
|||
cv::Mat projection_matrix =
|
||||
cv::getPerspectiveTransform(src_points, dst_points);
|
||||
cv::Mat transformed;
|
||||
cv::warpPerspective(src, transformed, projection_matrix,
|
||||
cv::warpPerspective(*src, transformed, projection_matrix,
|
||||
cv::Size(dst_width, dst_height),
|
||||
/*flags=*/cv::INTER_LINEAR,
|
||||
/*borderMode=*/border_mode_);
|
||||
|
@ -102,19 +109,22 @@ class OpenCvProcessor : public ImageToTensorConverter {
|
|||
auto transform,
|
||||
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
|
||||
range_min, range_max));
|
||||
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
|
||||
transformed.convertTo(dst, mat_type_, transform.scale, transform.offset);
|
||||
return tensor;
|
||||
}
|
||||
|
||||
private:
|
||||
enum cv::BorderTypes border_mode_;
|
||||
Tensor::ElementType tensor_type_;
|
||||
int mat_type_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
|
||||
CalculatorContext* cc, BorderMode border_mode) {
|
||||
return absl::make_unique<OpenCvProcessor>(border_mode);
|
||||
CalculatorContext* cc, BorderMode border_mode,
|
||||
Tensor::ElementType tensor_type) {
|
||||
return absl::make_unique<OpenCvProcessor>(border_mode, tensor_type);
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -25,7 +25,8 @@ namespace mediapipe {
|
|||
|
||||
// Creates OpenCV image-to-tensor converter.
|
||||
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
|
||||
CalculatorContext* cc, BorderMode border_mode);
|
||||
CalculatorContext* cc, BorderMode border_mode,
|
||||
Tensor::ElementType tensor_type);
|
||||
|
||||
} // namespace mediapipe
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ class InferenceCalculatorSelectorImpl
|
|||
Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>(
|
||||
subgraph_node);
|
||||
std::vector<absl::string_view> impls;
|
||||
|
||||
const bool should_use_gpu =
|
||||
!options.has_delegate() || // Use GPU delegate if not specified
|
||||
(options.has_delegate() && options.delegate().has_gpu());
|
||||
|
|
|
@ -81,6 +81,7 @@ class InferenceCalculatorCpuImpl
|
|||
Packet<TfLiteModelPtr> model_packet_;
|
||||
std::unique_ptr<tflite::Interpreter> interpreter_;
|
||||
TfLiteDelegatePtr delegate_;
|
||||
bool has_quantized_input_;
|
||||
};
|
||||
|
||||
absl::Status InferenceCalculatorCpuImpl::UpdateContract(
|
||||
|
@ -109,11 +110,19 @@ absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
|
|||
for (int i = 0; i < input_tensors.size(); ++i) {
|
||||
const Tensor* input_tensor = &input_tensors[i];
|
||||
auto input_tensor_view = input_tensor->GetCpuReadView();
|
||||
if (has_quantized_input_) {
|
||||
// TODO: Support more quantized tensor types.
|
||||
auto input_tensor_buffer = input_tensor_view.buffer<uint8>();
|
||||
uint8* local_tensor_buffer = interpreter_->typed_input_tensor<uint8>(i);
|
||||
std::memcpy(local_tensor_buffer, input_tensor_buffer,
|
||||
input_tensor->bytes());
|
||||
} else {
|
||||
auto input_tensor_buffer = input_tensor_view.buffer<float>();
|
||||
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i);
|
||||
std::memcpy(local_tensor_buffer, input_tensor_buffer,
|
||||
input_tensor->bytes());
|
||||
}
|
||||
}
|
||||
|
||||
// Run inference.
|
||||
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
|
||||
|
@ -167,10 +176,9 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegateAndAllocateTensors(
|
|||
|
||||
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
|
||||
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
|
||||
// TODO: Support quantized tensors.
|
||||
RET_CHECK_NE(
|
||||
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
has_quantized_input_ =
|
||||
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type ==
|
||||
kTfLiteAffineQuantization;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -226,7 +234,7 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
|
|||
#endif // defined(__EMSCRIPTEN__)
|
||||
|
||||
if (use_xnnpack) {
|
||||
TfLiteXNNPackDelegateOptions xnnpack_opts{};
|
||||
auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
|
||||
xnnpack_opts.num_threads =
|
||||
GetXnnpackNumThreads(opts_has_delegate, opts_delegate);
|
||||
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
|
||||
|
|
|
@ -154,8 +154,9 @@ TEST_P(InferenceCalculatorTest, TestFaceDetection) {
|
|||
detection_packets[0].Get<std::vector<Detection>>();
|
||||
#if !defined(MEDIAPIPE_PROTO_LITE)
|
||||
// Approximately is not available with lite protos (b/178137094).
|
||||
EXPECT_THAT(dets,
|
||||
ElementsAre(Approximately(EqualsProto(expected_detection))));
|
||||
constexpr float kEpison = 0.001;
|
||||
EXPECT_THAT(dets, ElementsAre(Approximately(EqualsProto(expected_detection),
|
||||
kEpison)));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -59,8 +59,6 @@ class InferenceCalculatorGlImpl
|
|||
|
||||
// TfLite requires us to keep the model alive as long as the interpreter is.
|
||||
Packet<TfLiteModelPtr> model_packet_;
|
||||
std::unique_ptr<tflite::Interpreter> interpreter_;
|
||||
TfLiteDelegatePtr delegate_;
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
mediapipe::GlCalculatorHelper gpu_helper_;
|
||||
|
@ -72,6 +70,9 @@ class InferenceCalculatorGlImpl
|
|||
tflite_gpu_runner_usage_;
|
||||
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
|
||||
|
||||
TfLiteDelegatePtr delegate_;
|
||||
std::unique_ptr<tflite::Interpreter> interpreter_;
|
||||
|
||||
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
|
||||
std::vector<Tensor::Shape> output_shapes_;
|
||||
std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_;
|
||||
|
@ -252,12 +253,17 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
|
|||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
|
||||
gpu_buffers_in_.clear();
|
||||
gpu_buffers_out_.clear();
|
||||
return absl::OkStatus();
|
||||
}));
|
||||
}
|
||||
|
||||
// Delegate must outlive the interpreter, hence the order is important.
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
return absl::OkStatus();
|
||||
}));
|
||||
} else {
|
||||
// Delegate must outlive the interpreter, hence the order is important.
|
||||
interpreter_ = nullptr;
|
||||
delegate_ = nullptr;
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -266,6 +266,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
|
|||
auto raw_box_tensor = &input_tensors[0];
|
||||
RET_CHECK_EQ(raw_box_tensor->shape().dims.size(), 3);
|
||||
RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
|
||||
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
|
||||
RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_);
|
||||
RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_);
|
||||
auto raw_score_tensor = &input_tensors[1];
|
||||
|
@ -385,6 +386,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU(
|
|||
CalculatorContext* cc, std::vector<Detection>* output_detections) {
|
||||
const auto& input_tensors = *kInTensors(cc);
|
||||
RET_CHECK_GE(input_tensors.size(), 2);
|
||||
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
|
||||
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
|
||||
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &input_tensors, &cc,
|
||||
|
@ -563,7 +565,6 @@ absl::Status TensorsToDetectionsCalculator::LoadOptions(CalculatorContext* cc) {
|
|||
// Get calculator options specified in the graph.
|
||||
options_ = cc->Options<::mediapipe::TensorsToDetectionsCalculatorOptions>();
|
||||
RET_CHECK(options_.has_num_classes());
|
||||
RET_CHECK(options_.has_num_boxes());
|
||||
RET_CHECK(options_.has_num_coords());
|
||||
|
||||
num_classes_ = options_.num_classes();
|
||||
|
|
|
@ -355,9 +355,10 @@ absl::Status TensorsToSegmentationCalculator::ProcessCpu(
|
|||
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
|
||||
ImageFormat::VEC32F1, output_width, output_height);
|
||||
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
|
||||
cv::Mat output_mat = formats::MatView(output_mask.get());
|
||||
auto output_mat = formats::MatView(output_mask.get());
|
||||
// Upsample small mask into output.
|
||||
cv::resize(small_mask_mat, output_mat, cv::Size(output_width, output_height));
|
||||
cv::resize(small_mask_mat, *output_mat,
|
||||
cv::Size(output_width, output_height));
|
||||
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
|
|
|
@ -334,6 +334,7 @@ cc_library(
|
|||
":image_frame_to_tensor_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/port:core_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
] + select({
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "mediapipe/calculators/tensorflow/image_frame_to_tensor_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/port/proto_ns.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
|
@ -32,7 +33,10 @@ namespace {
|
|||
// Convert the ImageFrame into Tensor with floating point value type.
|
||||
// The value will be normalized based on mean and stddev.
|
||||
std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
|
||||
const ImageFrame& image_frame, float mean, float stddev) {
|
||||
// const ImageFrame& image_frame, float mean, float stddev) {
|
||||
const ImageFrame& image_frame,
|
||||
const mediapipe::proto_ns::RepeatedField<float>& mean,
|
||||
const mediapipe::proto_ns::RepeatedField<float>& stddev) {
|
||||
const int cols = image_frame.Width();
|
||||
const int rows = image_frame.Height();
|
||||
const int channels = image_frame.NumberOfChannels();
|
||||
|
@ -45,7 +49,20 @@ std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
|
|||
for (int row = 0; row < rows; ++row) {
|
||||
for (int col = 0; col < cols; ++col) {
|
||||
for (int channel = 0; channel < channels; ++channel) {
|
||||
tensor_data(row, col, channel) = (pixel[channel] - mean) / stddev;
|
||||
float mean_value = 0;
|
||||
if (mean.size() > 1) {
|
||||
mean_value = mean[channel];
|
||||
} else if (!mean.empty()) {
|
||||
mean_value = mean[0];
|
||||
}
|
||||
float stddev_value = 1;
|
||||
if (stddev.size() > 1) {
|
||||
stddev_value = stddev[channel];
|
||||
} else if (!stddev.empty()) {
|
||||
stddev_value = stddev[0];
|
||||
}
|
||||
tensor_data(row, col, channel) =
|
||||
(pixel[channel] - mean_value) / stddev_value;
|
||||
}
|
||||
pixel += channels;
|
||||
}
|
||||
|
@ -126,7 +143,18 @@ absl::Status ImageFrameToTensorCalculator::Process(CalculatorContext* cc) {
|
|||
const tf::DataType data_type = options_.data_type();
|
||||
RET_CHECK_EQ(data_type, tf::DT_FLOAT)
|
||||
<< "Unsupported data type " << data_type;
|
||||
RET_CHECK_GT(options_.stddev(), 0.0f);
|
||||
RET_CHECK_GT(options_.stddev().size(), 0) << "You must set a stddev.";
|
||||
RET_CHECK_GT(options_.stddev()[0], 0.0f) << "The stddev cannot be zero.";
|
||||
if (options_.stddev().size() > 1) {
|
||||
RET_CHECK_EQ(options_.stddev().size(), video_frame.NumberOfChannels())
|
||||
<< "If specifying multiple stddev normalization values, "
|
||||
<< "the number must match the number of image channels.";
|
||||
}
|
||||
if (options_.mean().size() > 1) {
|
||||
RET_CHECK_EQ(options_.mean().size(), video_frame.NumberOfChannels())
|
||||
<< "If specifying multiple mean normalization values, "
|
||||
<< "the number must match the number of image channels.";
|
||||
}
|
||||
tensor = ImageFrameToNormalizedTensor(video_frame, options_.mean(),
|
||||
options_.stddev());
|
||||
} else {
|
||||
|
|
|
@ -32,6 +32,6 @@ message ImageFrameToTensorCalculatorOptions {
|
|||
// If set, the output tensor T is equal to (F - mean * J) / stddev, where F
|
||||
// and J are the input image frame and the all-ones matrix of the same size,
|
||||
// respectively. Otherwise, T is equal to F.
|
||||
optional float mean = 2;
|
||||
optional float stddev = 3;
|
||||
repeated float mean = 2;
|
||||
repeated float stddev = 3;
|
||||
}
|
||||
|
|
|
@ -454,4 +454,32 @@ TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithMeanAndStddev) {
|
|||
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
|
||||
}
|
||||
|
||||
TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithRepeatMeanAndStddev) {
|
||||
runner_ = ::absl::make_unique<CalculatorRunner>(
|
||||
"ImageFrameToTensorCalculator",
|
||||
"[mediapipe.ImageFrameToTensorCalculatorOptions.ext]"
|
||||
"{data_type:DT_FLOAT mean:128.0 mean:128.0 mean:128.0 "
|
||||
" stddev:128.0 stddev:128.0 stddev:128.0}",
|
||||
1, 1, 0);
|
||||
|
||||
// Create a single pixel image of fixed color #0080ff.
|
||||
auto image_frame = ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1, 1);
|
||||
const uint8 color[] = {0, 128, 255};
|
||||
SetToColor<uint8>(color, image_frame.get());
|
||||
|
||||
runner_->MutableInputs()->Index(0).packets.push_back(
|
||||
Adopt(image_frame.release()).At(Timestamp(0)));
|
||||
MP_ASSERT_OK(runner_->Run());
|
||||
|
||||
const auto& tensor = runner_->Outputs().Index(0).packets[0].Get<tf::Tensor>();
|
||||
EXPECT_EQ(tensor.dtype(), tf::DT_FLOAT);
|
||||
ASSERT_EQ(tensor.dims(), 3);
|
||||
EXPECT_EQ(tensor.shape().dim_size(0), 1);
|
||||
EXPECT_EQ(tensor.shape().dim_size(1), 1);
|
||||
EXPECT_EQ(tensor.shape().dim_size(2), 3);
|
||||
const float* actual = tensor.flat<float>().data();
|
||||
EXPECT_EQ(actual[0], -1.0f); // ( 0 - 128) / 128
|
||||
EXPECT_EQ(actual[1], 0.0f); // (128 - 128) / 128
|
||||
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
|
||||
}
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -70,10 +70,10 @@ const int kNumCoordsPerBox = 4;
|
|||
// image/understanding/object_detection/export_inference_graph.py
|
||||
//
|
||||
// By default, the output Detections store label ids (integers) for each
|
||||
// detection. Optionally, a label map (of the form std::map<int, std::string>
|
||||
// detection. Optionally, a label map (of the form std::map<int, string>
|
||||
// mapping label ids to label names as strings) can be made available as an
|
||||
// input side packet, in which case the output Detections store
|
||||
// labels as their associated std::string provided by the label map.
|
||||
// labels as their associated string provided by the label map.
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
|
|
|
@ -59,7 +59,7 @@ namespace mpms = mediapipe::mediasequence;
|
|||
// bounding boxes from vector<Detections>, and streams with the
|
||||
// "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s
|
||||
// associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints
|
||||
// from flat_hash_map<std::string, vector<pair<float, float>>>. "IMAGE_${NAME}",
|
||||
// from flat_hash_map<string, vector<pair<float, float>>>. "IMAGE_${NAME}",
|
||||
// "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of
|
||||
// each stream, which allows for multiple image streams to be included. However,
|
||||
// the default names are suppored by more tools.
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
// output_side_packet: "SEQUENCE_EXAMPLE:sequence_example"
|
||||
// }
|
||||
//
|
||||
// Example converting to std::string in Close():
|
||||
// Example converting to string in Close():
|
||||
// node {
|
||||
// calculator: "StringToSequenceExampleCalculator"
|
||||
// input_side_packet: "SEQUENCE_EXAMPLE:sequence_example"
|
||||
|
|
|
@ -302,9 +302,8 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
|
|||
<< "To use recurrent_tag_pairs, batch_size must be 1.";
|
||||
for (const auto& tag_pair : options_.recurrent_tag_pair()) {
|
||||
const std::vector<std::string> tags = absl::StrSplit(tag_pair, ':');
|
||||
RET_CHECK_EQ(tags.size(), 2)
|
||||
<< "recurrent_tag_pair must be a colon "
|
||||
"separated std::string with two components: "
|
||||
RET_CHECK_EQ(tags.size(), 2) << "recurrent_tag_pair must be a colon "
|
||||
"separated string with two components: "
|
||||
<< tag_pair;
|
||||
RET_CHECK(mediapipe::ContainsKey(tag_to_tensor_map_, tags[0]))
|
||||
<< "Can't find tag '" << tags[0] << "' in signature "
|
||||
|
|
|
@ -86,7 +86,7 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase {
|
|||
cc->InputSidePackets()
|
||||
.Tag(kStringModelFilePathTag)
|
||||
.Set<std::string>(
|
||||
// Filename of std::string model.
|
||||
// Filename of string model.
|
||||
);
|
||||
}
|
||||
cc->OutputSidePackets()
|
||||
|
|
|
@ -84,7 +84,7 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator {
|
|||
} else if (input_side_packets->HasTag(kStringModelFilePathTag)) {
|
||||
input_side_packets->Tag(kStringModelFilePathTag)
|
||||
.Set<std::string>(
|
||||
// Filename of std::string model.
|
||||
// Filename of string model.
|
||||
);
|
||||
}
|
||||
output_side_packets->Tag(kSessionTag)
|
||||
|
|
|
@ -69,6 +69,8 @@ const std::string MaybeConvertSignatureToTag(
|
|||
[](unsigned char c) { return std::toupper(c); });
|
||||
output = absl::StrReplaceAll(output, {{"/", "_"}});
|
||||
output = absl::StrReplaceAll(output, {{"-", "_"}});
|
||||
output = absl::StrReplaceAll(output, {{".", "_"}});
|
||||
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
|
||||
return output;
|
||||
} else {
|
||||
return name;
|
||||
|
|
|
@ -71,6 +71,8 @@ const std::string MaybeConvertSignatureToTag(
|
|||
[](unsigned char c) { return std::toupper(c); });
|
||||
output = absl::StrReplaceAll(output, {{"/", "_"}});
|
||||
output = absl::StrReplaceAll(output, {{"-", "_"}});
|
||||
output = absl::StrReplaceAll(output, {{".", "_"}});
|
||||
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
|
||||
return output;
|
||||
} else {
|
||||
return name;
|
||||
|
|
|
@ -939,7 +939,7 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
|
|||
|
||||
#if !defined(MEDIAPIPE_EDGE_TPU)
|
||||
if (use_xnnpack) {
|
||||
TfLiteXNNPackDelegateOptions xnnpack_opts{};
|
||||
auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
|
||||
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
|
||||
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
|
||||
&TfLiteXNNPackDelegateDelete);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
|
@ -558,7 +559,7 @@ uniform ivec2 out_size;
|
|||
const int output_layer_index = int($1);
|
||||
const float combine_with_previous_ratio = float($2);
|
||||
|
||||
// Will be replaced with either '#define READ_PREVIOUS' or empty std::string
|
||||
// Will be replaced with either '#define READ_PREVIOUS' or empty string
|
||||
$3 //DEFINE_READ_PREVIOUS
|
||||
|
||||
void main() {
|
||||
|
|
|
@ -51,6 +51,7 @@ class LocalFilePatternContentsCalculator : public CalculatorBase {
|
|||
cc->InputSidePackets().Tag(kFileDirectoryTag).Get<std::string>(),
|
||||
cc->InputSidePackets().Tag(kFileSuffixTag).Get<std::string>(),
|
||||
&filenames_));
|
||||
std::sort(filenames_.begin(), filenames_.end());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -129,8 +129,8 @@ TEST(PacketFrequencyCalculatorTest, MultiPacketTest) {
|
|||
// Tests packet frequency with multiple input/output streams.
|
||||
TEST(PacketFrequencyCalculatorTest, MultiStreamTest) {
|
||||
// Setup the calculator runner and provide strings as input on all streams
|
||||
// (note that it doesn't have to be std::string; the calculator can take any
|
||||
// type as input).
|
||||
// (note that it doesn't have to be string; the calculator can take any type
|
||||
// as input).
|
||||
CalculatorRunner runner(GetNodeWithMultipleStreams());
|
||||
|
||||
// Packet 1 on stream 1.
|
||||
|
|
|
@ -37,6 +37,13 @@ RenderAnnotation::Rectangle* NewRect(
|
|||
annotation->mutable_color()->set_b(options.color().b());
|
||||
annotation->set_thickness(options.thickness());
|
||||
|
||||
if (options.has_top_left_thickness()) {
|
||||
CHECK(!options.oval());
|
||||
CHECK(!options.filled());
|
||||
annotation->mutable_rectangle()->set_top_left_thickness(
|
||||
options.top_left_thickness());
|
||||
}
|
||||
|
||||
return options.oval() ? options.filled()
|
||||
? annotation->mutable_filled_oval()
|
||||
->mutable_oval()
|
||||
|
@ -136,6 +143,11 @@ absl::Status RectToRenderDataCalculator::Open(CalculatorContext* cc) {
|
|||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
options_ = cc->Options<RectToRenderDataCalculatorOptions>();
|
||||
if (options_.has_top_left_thickness()) {
|
||||
// Filled and oval don't support top_left_thickness.
|
||||
RET_CHECK(!options_.filled());
|
||||
RET_CHECK(!options_.oval());
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
|
|
@ -35,4 +35,8 @@ message RectToRenderDataCalculatorOptions {
|
|||
|
||||
// Whether the rendered rectangle should be an oval.
|
||||
optional bool oval = 4 [default = false];
|
||||
|
||||
// Radius of top left corner circle. Only supported for oval=false,
|
||||
// filled=false.
|
||||
optional double top_left_thickness = 5;
|
||||
}
|
||||
|
|
|
@ -48,8 +48,8 @@ constexpr char kTopKIndexesTag[] = "TOP_K_INDEXES";
|
|||
constexpr char kScoresTag[] = "SCORES";
|
||||
|
||||
// A calculator that takes a vector of scores and returns the indexes, scores,
|
||||
// labels of the top k elements, classification protos, and summary std::string
|
||||
// (in csv format).
|
||||
// labels of the top k elements, classification protos, and summary string (in
|
||||
// csv format).
|
||||
//
|
||||
// Usage example:
|
||||
// node {
|
||||
|
|
|
@ -76,7 +76,7 @@ constexpr char kTrackingTag[] = "TRACKING";
|
|||
// IMAGE_SIZE: Input image dimension.
|
||||
// TRACKED_BOXES : input box tracking result (proto TimedBoxProtoList) from
|
||||
// BoxTrackerCalculator.
|
||||
// ADD_INDEX: Optional std::string containing binary format proto of type
|
||||
// ADD_INDEX: Optional string containing binary format proto of type
|
||||
// BoxDetectorIndex. Used for adding target index to the detector
|
||||
// search index during runtime.
|
||||
// CANCEL_OBJECT_ID: Optional id of box to be removed. This is recommended
|
||||
|
@ -91,8 +91,7 @@ constexpr char kTrackingTag[] = "TRACKING";
|
|||
// BOXES: Optional output stream of type TimedBoxProtoList for each lost box.
|
||||
//
|
||||
// Imput side packets:
|
||||
// INDEX_PROTO_STRING: Optional std::string containing binary format proto of
|
||||
// type
|
||||
// INDEX_PROTO_STRING: Optional string containing binary format proto of type
|
||||
// BoxDetectorIndex. Used for initializing box_detector
|
||||
// with predefined template images.
|
||||
// FRAME_ALIGNMENT: Optional integer to indicate alignment_boundary for
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
|
@ -78,7 +79,7 @@ const char kOptionsTag[] = "OPTIONS";
|
|||
// TrackingData and added to current set of tracked boxes.
|
||||
// This is recommended to be used with SyncSetInputStreamHandler.
|
||||
// START_POS_PROTO_STRING: Same as START_POS, but is in the form of serialized
|
||||
// protobuffer std::string. When both START_POS and
|
||||
// protobuffer string. When both START_POS and
|
||||
// START_POS_PROTO_STRING are present, START_POS is used. Suggest
|
||||
// to specify only one of them.
|
||||
// RESTART_POS: Same as START_POS, but exclusively for receiving detection
|
||||
|
@ -99,7 +100,7 @@ const char kOptionsTag[] = "OPTIONS";
|
|||
// can be in arbitrary order.
|
||||
// Use with SyncSetInputStreamHandler in streaming mode only.
|
||||
// RA_TRACK_PROTO_STRING: Same as RA_TRACK, but is in the form of serialized
|
||||
// protobuffer std::string. When both RA_TRACK and
|
||||
// protobuffer string. When both RA_TRACK and
|
||||
// RA_TRACK_PROTO_STRING are present, RA_TRACK is used. Suggest
|
||||
// to specify only one of them.
|
||||
//
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
|
|
|
@ -79,7 +79,7 @@ ImageFormat::Format GetImageFormat(int num_channels) {
|
|||
// to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH".
|
||||
// The calculator will call FFmpeg binary to save audio tracks as an aac file.
|
||||
// If the audio tracks can't be extracted by FFmpeg, the output side packet
|
||||
// will contain an empty std::string.
|
||||
// will contain an empty string.
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-feature android:name="android.hardware.camera" />
|
||||
|
||||
<!-- For profiling -->
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
|
|
|
@ -40,6 +40,7 @@ android_binary(
|
|||
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
|
||||
"//mediapipe/modules/pose_detection:pose_detection.tflite",
|
||||
|
|
|
@ -80,6 +80,7 @@ cc_library(
|
|||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"@com_google_absl//absl/status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/status/status.h"
|
||||
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
|
||||
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.pb.h"
|
||||
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_state.h"
|
||||
|
@ -41,6 +42,7 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
|
|||
// Can be used to control whether an animated zoom should actually performed
|
||||
// (configured through option us_to_first_rect). If provided, a non-zero integer
|
||||
// will allow the animated zoom to be used when the first detections arrive.
|
||||
// Applies to first detection only.
|
||||
constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
|
||||
// Can be used to control the maximum zoom; note that it is re-evaluated only
|
||||
// upon change of input resolution. A value of 100 disables zooming and is the
|
||||
|
@ -112,6 +114,16 @@ class ContentZoomingCalculator : public CalculatorBase {
|
|||
int* pan_offset, int* height);
|
||||
// Sets max_frame_value_ and target_aspect_
|
||||
absl::Status UpdateAspectAndMax();
|
||||
// Smooth camera path
|
||||
absl::Status SmoothAndClampPath(int target_width, int target_height,
|
||||
float path_width, float path_height,
|
||||
float* path_offset_x, float* path_offset_y);
|
||||
// Compute box containing all detections.
|
||||
absl::Status GetDetectionsBox(mediapipe::CalculatorContext* cc, float* xmin,
|
||||
float* xmax, float* ymin, float* ymax,
|
||||
bool* only_required_found,
|
||||
bool* has_detections);
|
||||
|
||||
ContentZoomingCalculatorOptions options_;
|
||||
// Detection frame width/height.
|
||||
int frame_height_;
|
||||
|
@ -537,68 +549,13 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
UpdateForResolutionChange(cc, frame_width, frame_height));
|
||||
}
|
||||
|
||||
bool only_required_found = false;
|
||||
|
||||
// Compute the box that contains all "is_required" detections.
|
||||
float xmin = 1, ymin = 1, xmax = 0, ymax = 0;
|
||||
if (cc->Inputs().HasTag(kSalientRegions)) {
|
||||
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>();
|
||||
for (const auto& region : detection_set.detections()) {
|
||||
if (!region.only_required()) {
|
||||
continue;
|
||||
}
|
||||
only_required_found = true;
|
||||
MP_RETURN_IF_ERROR(UpdateRanges(
|
||||
region, options_.detection_shift_vertical(),
|
||||
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kDetections)) {
|
||||
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
|
||||
if (last_only_required_detection_ == 0) {
|
||||
// If no detections are available and we never had any,
|
||||
// simply return the full-image rectangle as crop-rect.
|
||||
if (cc->Outputs().HasTag(kCropRect)) {
|
||||
auto default_rect = absl::make_unique<mediapipe::Rect>();
|
||||
default_rect->set_x_center(frame_width_ / 2);
|
||||
default_rect->set_y_center(frame_height_ / 2);
|
||||
default_rect->set_width(frame_width_);
|
||||
default_rect->set_height(frame_height_);
|
||||
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
|
||||
default_rect->set_x_center(0.5);
|
||||
default_rect->set_y_center(0.5);
|
||||
default_rect->set_width(1.0);
|
||||
default_rect->set_height(1.0);
|
||||
cc->Outputs()
|
||||
.Tag(kNormalizedCropRect)
|
||||
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
// Also provide a first crop rect: in this case a zero-sized one.
|
||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||
cc->Outputs()
|
||||
.Tag(kFirstCropRect)
|
||||
.Add(new mediapipe::NormalizedRect(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
} else {
|
||||
auto raw_detections = cc->Inputs()
|
||||
.Tag(kDetections)
|
||||
.Get<std::vector<mediapipe::Detection>>();
|
||||
for (const auto& detection : raw_detections) {
|
||||
only_required_found = true;
|
||||
MP_RETURN_IF_ERROR(UpdateRanges(
|
||||
detection, options_.detection_shift_vertical(),
|
||||
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
|
||||
}
|
||||
}
|
||||
}
|
||||
bool only_required_found = false;
|
||||
bool has_detections = true;
|
||||
MP_RETURN_IF_ERROR(GetDetectionsBox(cc, &xmin, &xmax, &ymin, &ymax,
|
||||
&only_required_found, &has_detections));
|
||||
if (!has_detections) return absl::OkStatus();
|
||||
|
||||
const bool may_start_animation = (options_.us_to_first_rect() != 0) &&
|
||||
(!cc->Inputs().HasTag(kAnimateZoom) ||
|
||||
|
@ -656,7 +613,8 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
path_solver_zoom_->ClearHistory();
|
||||
}
|
||||
const bool camera_active =
|
||||
is_animating || pan_state || tilt_state || zoom_state;
|
||||
is_animating || ((pan_state || tilt_state || zoom_state) &&
|
||||
!options_.disable_animations());
|
||||
// Waiting for first rect before setting any value of the camera active flag
|
||||
// so we avoid setting it to false during initialization.
|
||||
if (cc->Outputs().HasTag(kCameraActive) &&
|
||||
|
@ -666,17 +624,26 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
.AddPacket(MakePacket<bool>(camera_active).At(cc->InputTimestamp()));
|
||||
}
|
||||
|
||||
// Skip the path solvers to the final destination if not animating.
|
||||
const bool disable_animations =
|
||||
options_.disable_animations() && path_solver_zoom_->IsInitialized();
|
||||
if (disable_animations) {
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->SetState(height));
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(offset_y));
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(offset_x));
|
||||
}
|
||||
|
||||
// Compute smoothed zoom camera path.
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
|
||||
height, cc->InputTimestamp().Microseconds()));
|
||||
float path_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
|
||||
float path_width = path_height * target_aspect_;
|
||||
const float path_width = path_height * target_aspect_;
|
||||
|
||||
// Update pixel-per-degree value for pan/tilt.
|
||||
int target_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetTargetPosition(&target_height));
|
||||
int target_width = target_height * target_aspect_;
|
||||
const int target_width = target_height * target_aspect_;
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->UpdatePixelsPerDegree(
|
||||
static_cast<float>(target_width) / kFieldOfView));
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->UpdatePixelsPerDegree(
|
||||
|
@ -692,66 +659,16 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
float path_offset_y;
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
|
||||
|
||||
float delta_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
|
||||
int delta_width = delta_height * target_aspect_;
|
||||
|
||||
// Smooth centering when zooming out.
|
||||
float remaining_width = target_width - path_width;
|
||||
int width_space = frame_width_ - target_width;
|
||||
if (abs(path_offset_x - frame_width_ / 2) >
|
||||
width_space / 2 + kPixelTolerance &&
|
||||
remaining_width > kPixelTolerance) {
|
||||
float required_width =
|
||||
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
|
||||
if (path_offset_x < frame_width_ / 2) {
|
||||
path_offset_x += delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
} else {
|
||||
path_offset_x -= delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
}
|
||||
}
|
||||
|
||||
float remaining_height = target_height - path_height;
|
||||
int height_space = frame_height_ - target_height;
|
||||
if (abs(path_offset_y - frame_height_ / 2) >
|
||||
height_space / 2 + kPixelTolerance &&
|
||||
remaining_height > kPixelTolerance) {
|
||||
float required_height =
|
||||
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
|
||||
if (path_offset_y < frame_height_ / 2) {
|
||||
path_offset_y += delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
} else {
|
||||
path_offset_y -= delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent box from extending beyond the image after camera smoothing.
|
||||
if (path_offset_y - ceil(path_height / 2.0) < 0) {
|
||||
path_offset_y = ceil(path_height / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
} else if (path_offset_y + ceil(path_height / 2.0) > frame_height_) {
|
||||
path_offset_y = frame_height_ - ceil(path_height / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
|
||||
}
|
||||
|
||||
if (path_offset_x - ceil(path_width / 2.0) < 0) {
|
||||
path_offset_x = ceil(path_width / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
} else if (path_offset_x + ceil(path_width / 2.0) > frame_width_) {
|
||||
path_offset_x = frame_width_ - ceil(path_width / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
|
||||
}
|
||||
|
||||
// Convert to top/bottom borders to remove.
|
||||
int path_top = path_offset_y - path_height / 2;
|
||||
int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
|
||||
// Update path.
|
||||
MP_RETURN_IF_ERROR(SmoothAndClampPath(target_width, target_height, path_width,
|
||||
path_height, &path_offset_x,
|
||||
&path_offset_y));
|
||||
|
||||
// Transmit result downstream to scenecroppingcalculator.
|
||||
if (cc->Outputs().HasTag(kDetectedBorders)) {
|
||||
// Convert to top/bottom borders to remove.
|
||||
const int path_top = path_offset_y - path_height / 2;
|
||||
const int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
|
||||
std::unique_ptr<StaticFeatures> features =
|
||||
absl::make_unique<StaticFeatures>();
|
||||
MakeStaticFeatures(path_top, path_bottom, frame_width_, frame_height_,
|
||||
|
@ -798,8 +715,8 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
|
||||
absl::make_unique<mediapipe::NormalizedRect>();
|
||||
float float_frame_width = static_cast<float>(frame_width_);
|
||||
float float_frame_height = static_cast<float>(frame_height_);
|
||||
const float float_frame_width = static_cast<float>(frame_width_);
|
||||
const float float_frame_height = static_cast<float>(frame_height_);
|
||||
if (is_animating) {
|
||||
auto rect =
|
||||
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
|
||||
|
@ -829,5 +746,130 @@ absl::Status ContentZoomingCalculator::Process(
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::SmoothAndClampPath(
|
||||
int target_width, int target_height, float path_width, float path_height,
|
||||
float* path_offset_x, float* path_offset_y) {
|
||||
float delta_height;
|
||||
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
|
||||
const int delta_width = delta_height * target_aspect_;
|
||||
|
||||
// Smooth centering when zooming out.
|
||||
const float remaining_width = target_width - path_width;
|
||||
const int width_space = frame_width_ - target_width;
|
||||
if (abs(*path_offset_x - frame_width_ / 2) >
|
||||
width_space / 2 + kPixelTolerance &&
|
||||
remaining_width > kPixelTolerance) {
|
||||
const float required_width =
|
||||
abs(*path_offset_x - frame_width_ / 2) - width_space / 2;
|
||||
if (*path_offset_x < frame_width_ / 2) {
|
||||
*path_offset_x += delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
|
||||
} else {
|
||||
*path_offset_x -= delta_width * (required_width / remaining_width);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
|
||||
}
|
||||
}
|
||||
|
||||
const float remaining_height = target_height - path_height;
|
||||
const int height_space = frame_height_ - target_height;
|
||||
if (abs(*path_offset_y - frame_height_ / 2) >
|
||||
height_space / 2 + kPixelTolerance &&
|
||||
remaining_height > kPixelTolerance) {
|
||||
const float required_height =
|
||||
abs(*path_offset_y - frame_height_ / 2) - height_space / 2;
|
||||
if (*path_offset_y < frame_height_ / 2) {
|
||||
*path_offset_y += delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
|
||||
} else {
|
||||
*path_offset_y -= delta_height * (required_height / remaining_height);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
|
||||
}
|
||||
}
|
||||
|
||||
// Prevent box from extending beyond the image after camera smoothing.
|
||||
if (*path_offset_y - ceil(path_height / 2.0) < 0) {
|
||||
*path_offset_y = ceil(path_height / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
|
||||
} else if (*path_offset_y + ceil(path_height / 2.0) > frame_height_) {
|
||||
*path_offset_y = frame_height_ - ceil(path_height / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
|
||||
}
|
||||
|
||||
if (*path_offset_x - ceil(path_width / 2.0) < 0) {
|
||||
*path_offset_x = ceil(path_width / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
|
||||
} else if (*path_offset_x + ceil(path_width / 2.0) > frame_width_) {
|
||||
*path_offset_x = frame_width_ - ceil(path_width / 2.0);
|
||||
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ContentZoomingCalculator::GetDetectionsBox(
|
||||
mediapipe::CalculatorContext* cc, float* xmin, float* xmax, float* ymin,
|
||||
float* ymax, bool* only_required_found, bool* has_detections) {
|
||||
if (cc->Inputs().HasTag(kSalientRegions)) {
|
||||
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>();
|
||||
for (const auto& region : detection_set.detections()) {
|
||||
if (!region.only_required()) {
|
||||
continue;
|
||||
}
|
||||
*only_required_found = true;
|
||||
MP_RETURN_IF_ERROR(UpdateRanges(
|
||||
region, options_.detection_shift_vertical(),
|
||||
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
|
||||
}
|
||||
}
|
||||
|
||||
if (cc->Inputs().HasTag(kDetections)) {
|
||||
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
|
||||
if (last_only_required_detection_ == 0) {
|
||||
// If no detections are available and we never had any,
|
||||
// simply return the full-image rectangle as crop-rect.
|
||||
if (cc->Outputs().HasTag(kCropRect)) {
|
||||
auto default_rect = absl::make_unique<mediapipe::Rect>();
|
||||
default_rect->set_x_center(frame_width_ / 2);
|
||||
default_rect->set_y_center(frame_height_ / 2);
|
||||
default_rect->set_width(frame_width_);
|
||||
default_rect->set_height(frame_height_);
|
||||
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
|
||||
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
|
||||
default_rect->set_x_center(0.5);
|
||||
default_rect->set_y_center(0.5);
|
||||
default_rect->set_width(1.0);
|
||||
default_rect->set_height(1.0);
|
||||
cc->Outputs()
|
||||
.Tag(kNormalizedCropRect)
|
||||
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
// Also provide a first crop rect: in this case a zero-sized one.
|
||||
if (cc->Outputs().HasTag(kFirstCropRect)) {
|
||||
cc->Outputs()
|
||||
.Tag(kFirstCropRect)
|
||||
.Add(new mediapipe::NormalizedRect(),
|
||||
Timestamp(cc->InputTimestamp()));
|
||||
}
|
||||
*has_detections = false;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
} else {
|
||||
auto raw_detections = cc->Inputs()
|
||||
.Tag(kDetections)
|
||||
.Get<std::vector<mediapipe::Detection>>();
|
||||
for (const auto& detection : raw_detections) {
|
||||
*only_required_found = true;
|
||||
MP_RETURN_IF_ERROR(UpdateRanges(
|
||||
detection, options_.detection_shift_vertical(),
|
||||
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
|
||||
}
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -19,7 +19,7 @@ package mediapipe.autoflip;
|
|||
import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto";
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
// NextTag: 18
|
||||
// NextTag: 19
|
||||
message ContentZoomingCalculatorOptions {
|
||||
extend mediapipe.CalculatorOptions {
|
||||
optional ContentZoomingCalculatorOptions ext = 313091992;
|
||||
|
@ -71,6 +71,12 @@ message ContentZoomingCalculatorOptions {
|
|||
// us_to_first_rect time budget.
|
||||
optional int64 us_to_first_rect_delay = 16 [default = 0];
|
||||
|
||||
// When true, this flag disables animating camera motions,
|
||||
// and cuts directly to final target position.
|
||||
// Does not apply to the first instance (first detection will still animate).
|
||||
// Use "ANIMATE_ZOOM" input stream to control the first animation.
|
||||
optional bool disable_animations = 18;
|
||||
|
||||
// Deprecated parameters
|
||||
optional KinematicOptions kinematic_options = 2 [deprecated = true];
|
||||
optional int64 min_motion_to_reframe = 4 [deprecated = true];
|
||||
|
|
|
@ -56,7 +56,7 @@ constexpr char kRegionsTag[] = "REGIONS";
|
|||
constexpr char kDetectionsTag[] = "DETECTIONS";
|
||||
|
||||
// Converts an object detection to a autoflip SignalType. Returns true if the
|
||||
// std::string label has a autoflip label.
|
||||
// string label has a autoflip label.
|
||||
bool MatchType(const std::string& label, SignalType* type) {
|
||||
if (label == "person") {
|
||||
type->set_standard(SignalType::HUMAN);
|
||||
|
|
|
@ -182,7 +182,7 @@ namespace {
|
|||
absl::Status ParseAspectRatioString(const std::string& aspect_ratio_string,
|
||||
double* aspect_ratio) {
|
||||
std::string error_msg =
|
||||
"Aspect ratio std::string must be in the format of 'width:height', e.g. "
|
||||
"Aspect ratio string must be in the format of 'width:height', e.g. "
|
||||
"'1:1' or '5:4', your input was " +
|
||||
aspect_ratio_string;
|
||||
auto pos = aspect_ratio_string.find(':');
|
||||
|
|
|
@ -4,6 +4,7 @@ constexpr float kMinVelocity = 0.5;
|
|||
|
||||
namespace mediapipe {
|
||||
namespace autoflip {
|
||||
|
||||
namespace {
|
||||
int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
|
||||
std::deque<int> positions;
|
||||
|
@ -16,6 +17,7 @@ int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
|
|||
return positions[n];
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
|
||||
if (options_.has_min_motion_to_reframe()) {
|
||||
return abs(delta_degs) < options_.min_motion_to_reframe();
|
||||
|
@ -25,7 +27,9 @@ bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
|
|||
return abs(delta_degs) < options_.min_motion_to_reframe_lower();
|
||||
}
|
||||
}
|
||||
|
||||
void KinematicPathSolver::ClearHistory() { raw_positions_at_time_.clear(); }
|
||||
|
||||
absl::Status KinematicPathSolver::PredictMotionState(int position,
|
||||
const uint64 time_us,
|
||||
bool* state) {
|
||||
|
@ -48,6 +52,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
|
|||
}
|
||||
|
||||
int filtered_position = Median(raw_positions_at_time_copy);
|
||||
filtered_position =
|
||||
std::clamp(filtered_position, min_location_, max_location_);
|
||||
|
||||
double delta_degs =
|
||||
(filtered_position - current_position_px_) / pixels_per_degree_;
|
||||
|
||||
|
@ -59,6 +66,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
|
|||
// If the motion is smaller than the reframe_window and camera is moving,
|
||||
// don't use the update.
|
||||
*state = false;
|
||||
} else if (prior_position_px_ == current_position_px_ && motion_state_) {
|
||||
// Camera isn't actually moving. Likely face is past bounds.
|
||||
*state = false;
|
||||
} else {
|
||||
// Apply new position, plus the reframe window size.
|
||||
*state = true;
|
||||
|
@ -66,6 +76,7 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
|
|||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status KinematicPathSolver::AddObservation(int position,
|
||||
const uint64 time_us) {
|
||||
if (!initialized_) {
|
||||
|
@ -181,18 +192,22 @@ absl::Status KinematicPathSolver::AddObservation(int position,
|
|||
}
|
||||
|
||||
// Time and position updates.
|
||||
double delta_t = (time_us - current_time_) / 1000000.0;
|
||||
double delta_t_sec = (time_us - current_time_) / 1000000.0;
|
||||
if (options_.max_delta_time_sec() > 0) {
|
||||
// If updates are very infrequent, then limit the max time difference.
|
||||
delta_t_sec = fmin(delta_t_sec, options_.max_delta_time_sec());
|
||||
}
|
||||
// Time since last state/prediction update, smoothed by
|
||||
// mean_period_update_rate.
|
||||
if (mean_delta_t_ < 0) {
|
||||
mean_delta_t_ = delta_t;
|
||||
mean_delta_t_ = delta_t_sec;
|
||||
} else {
|
||||
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
|
||||
delta_t * options_.mean_period_update_rate();
|
||||
delta_t_sec * options_.mean_period_update_rate();
|
||||
}
|
||||
|
||||
// Observed velocity and then weighted update of this velocity.
|
||||
double observed_velocity = delta_degs / delta_t;
|
||||
// Observed velocity and then weighted update of this velocity (deg/sec).
|
||||
double observed_velocity = delta_degs / delta_t_sec;
|
||||
double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
|
||||
options_.max_update_rate());
|
||||
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
|
||||
|
@ -253,7 +268,8 @@ absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
|
|||
|
||||
absl::Status KinematicPathSolver::SetState(const float position) {
|
||||
RET_CHECK(initialized_) << "SetState called before first observation added.";
|
||||
current_position_px_ = position;
|
||||
current_position_px_ = std::clamp(position, static_cast<float>(min_location_),
|
||||
static_cast<float>(max_location_));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
|
|
@ -71,6 +71,8 @@ class KinematicPathSolver {
|
|||
// Provides the change in position from last state.
|
||||
absl::Status GetDeltaState(float* delta_position);
|
||||
|
||||
bool IsInitialized() { return initialized_; }
|
||||
|
||||
private:
|
||||
// Tuning options.
|
||||
KinematicOptions options_;
|
||||
|
|
|
@ -31,6 +31,9 @@ message KinematicOptions {
|
|||
optional int64 filtering_time_window_us = 7 [default = 0];
|
||||
// Weighted update of average period, used for motion updates.
|
||||
optional float mean_period_update_rate = 8 [default = 0.25];
|
||||
// When set, caps the maximum time difference (seconds) calculated between new
|
||||
// updates/observations. Useful when updates come very infrequently.
|
||||
optional double max_delta_time_sec = 13;
|
||||
// Scale factor for max velocity, to be multiplied by the distance from center
|
||||
// in degrees. Cannot be used with max_velocity and must be used with
|
||||
// max_velocity_shift.
|
||||
|
|
|
@ -419,6 +419,13 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
|
|||
MP_ASSERT_OK(solver.SetState(400));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 400);
|
||||
// Expect to stay in bounds.
|
||||
MP_ASSERT_OK(solver.SetState(600));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 500);
|
||||
MP_ASSERT_OK(solver.SetState(-100));
|
||||
MP_ASSERT_OK(solver.GetState(&state));
|
||||
EXPECT_FLOAT_EQ(state, 0);
|
||||
}
|
||||
TEST(KinematicPathSolverTest, PassBorderTest) {
|
||||
KinematicOptions options;
|
||||
|
|
|
@ -83,7 +83,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
|
|||
const double in, const double out, Problem* problem, double* a, double* b,
|
||||
double* c, double* d, double* k) {
|
||||
// Creating a cost function, with 1D residual and 5 1D parameter blocks. This
|
||||
// is what the "1, 1, 1, 1, 1, 1" std::string below means.
|
||||
// is what the "1, 1, 1, 1, 1, 1" string below means.
|
||||
CostFunction* cost_function =
|
||||
new AutoDiffCostFunction<PolynomialResidual, 1, 1, 1, 1, 1, 1>(
|
||||
new PolynomialResidual(in, out));
|
||||
|
|
|
@ -55,7 +55,8 @@ class SceneCameraMotionAnalyzer {
|
|||
scene_camera_motion_analyzer_options)
|
||||
: options_(scene_camera_motion_analyzer_options),
|
||||
time_since_last_salient_region_us_(0),
|
||||
has_solid_color_background_(false) {}
|
||||
has_solid_color_background_(false),
|
||||
total_scene_frames_(0) {}
|
||||
|
||||
~SceneCameraMotionAnalyzer() {}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ absl::Status PrintHelloWorld() {
|
|||
ASSIGN_OR_RETURN(OutputStreamPoller poller,
|
||||
graph.AddOutputStreamPoller("out"));
|
||||
MP_RETURN_IF_ERROR(graph.StartRun({}));
|
||||
// Give 10 input packets that contains the same std::string "Hello World!".
|
||||
// Give 10 input packets that contains the same string "Hello World!".
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
|
||||
"in", MakePacket<std::string>("Hello World!").At(Timestamp(i))));
|
||||
|
@ -52,7 +52,7 @@ absl::Status PrintHelloWorld() {
|
|||
// Close the input stream "in".
|
||||
MP_RETURN_IF_ERROR(graph.CloseInputStream("in"));
|
||||
mediapipe::Packet packet;
|
||||
// Get the output packets std::string.
|
||||
// Get the output packets string.
|
||||
while (poller.Next(&packet)) {
|
||||
LOG(INFO) << packet.Get<std::string>();
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ objc_library(
|
|||
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
|
||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||
],
|
||||
features = ["-layering_check"],
|
||||
sdk_frameworks = [
|
||||
"AVFoundation",
|
||||
"CoreGraphics",
|
||||
|
|
|
@ -58,6 +58,7 @@ objc_library(
|
|||
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
|
||||
"//mediapipe/modules/face_landmark:face_landmark.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
|
||||
"//mediapipe/modules/pose_detection:pose_detection.tflite",
|
||||
|
|
|
@ -150,6 +150,13 @@ mediapipe_proto_library(
|
|||
deps = ["//mediapipe/framework:mediapipe_options_proto"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "android_no_jni",
|
||||
define_values = {"MEDIAPIPE_NO_JNI": "1"},
|
||||
values = {"crosstool_top": "//external:android/crosstool"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "calculator_base",
|
||||
srcs = ["calculator_base.cc"],
|
||||
|
@ -712,6 +719,7 @@ cc_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -916,15 +924,19 @@ cc_library(
|
|||
":packet",
|
||||
":packet_set",
|
||||
":type_map",
|
||||
"//mediapipe/framework/deps:no_destructor",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:map_util",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:source_location",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/tool:status_util",
|
||||
"//mediapipe/framework/tool:type_util",
|
||||
"//mediapipe/framework/tool:validate_name",
|
||||
"@com_google_absl//absl/base:core_headers",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/types:span",
|
||||
"@com_google_absl//absl/types:variant",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -134,6 +134,7 @@ cc_test(
|
|||
deps = [
|
||||
":packet",
|
||||
"//mediapipe/framework/port:gtest_main",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -313,8 +313,8 @@ template <class Calc>
|
|||
class Node : public NodeBase {
|
||||
public:
|
||||
Node() : NodeBase(Calc::kCalculatorName) {}
|
||||
// Overrides the built-in calculator type std::string with the provided
|
||||
// argument. Can be used to create nodes from pure interfaces.
|
||||
// Overrides the built-in calculator type string with the provided argument.
|
||||
// Can be used to create nodes from pure interfaces.
|
||||
// TODO: only use this for pure interfaces
|
||||
Node(const std::string& type_override) : NodeBase(type_override) {}
|
||||
|
||||
|
@ -377,6 +377,29 @@ class PacketGenerator {
|
|||
return *options_.MutableExtension(T::ext);
|
||||
}
|
||||
|
||||
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
|
||||
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
|
||||
using PayloadT =
|
||||
typename PortCommon<B, T, kIsOptional, kIsMultiple>::PayloadT;
|
||||
if constexpr (std::is_same_v<B, SideOutputBase>) {
|
||||
auto* base = &out_sides_[port.Tag()];
|
||||
if constexpr (kIsMultiple) {
|
||||
return MultiSideSource<PayloadT>(base);
|
||||
} else {
|
||||
return SideSource<PayloadT>(base);
|
||||
}
|
||||
} else if constexpr (std::is_same_v<B, SideInputBase>) {
|
||||
auto* base = &in_sides_[port.Tag()];
|
||||
if constexpr (kIsMultiple) {
|
||||
return MultiSideDestination<PayloadT>(base);
|
||||
} else {
|
||||
return SideDestination<PayloadT>(base);
|
||||
}
|
||||
} else {
|
||||
static_assert(dependent_false<B>::value, "Type not supported.");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::string type_;
|
||||
TagIndexMap<DestinationBase> in_sides_;
|
||||
|
@ -402,7 +425,7 @@ class Graph {
|
|||
}
|
||||
|
||||
// Creates a node of a specific type. Should be used for pure interfaces,
|
||||
// which do not have a built-in type std::string.
|
||||
// which do not have a built-in type string.
|
||||
template <class Calc>
|
||||
Node<Calc>& AddNode(const std::string& type) {
|
||||
auto node = std::make_unique<Node<Calc>>(type);
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
namespace mediapipe {
|
||||
namespace api2 {
|
||||
|
||||
// This class stores a constant std::string that can be inspected at compile
|
||||
// time in constexpr code.
|
||||
// This class stores a constant string that can be inspected at compile time
|
||||
// in constexpr code.
|
||||
class const_str {
|
||||
public:
|
||||
constexpr const_str(std::size_t size, const char* data)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user