Project import generated by Copybara.

GitOrigin-RevId: 73d686c40057684f8bfaca285368bf1813f9fc26
This commit is contained in:
MediaPipe Team 2022-03-21 12:07:37 -07:00 committed by jqtang
parent e6c19885c6
commit cc6a2f7af6
266 changed files with 3658 additions and 1681 deletions

View File

@ -1 +1 @@
4.2.1
5.0.0

View File

@ -10,5 +10,3 @@ For questions on how to work with MediaPipe, or support for problems that are no
If you are reporting a vulnerability, please use the [dedicated reporting process](https://github.com/google/mediapipe/security).
For high-level discussions about MediaPipe, please post to discuss@mediapipe.org, for questions about the development or internal workings of MediaPipe, or if you would like to know how to contribute to MediaPipe, please post to developers@mediapipe.org.

View File

@ -56,7 +56,7 @@ RUN pip3 install tf_slim
RUN ln -s /usr/bin/python3 /usr/bin/python
# Install bazel
ARG BAZEL_VERSION=4.2.1
ARG BAZEL_VERSION=5.0.0
RUN mkdir /bazel && \
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \

View File

@ -136,8 +136,8 @@ run code search using
## Community
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome
MediaPipe related frameworks, libraries and software
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe

View File

@ -61,11 +61,12 @@ http_archive(
sha256 = "de682ea824bfffba05b4e33b67431c247397d6175962534305136aa06f92e049",
)
# Google Benchmark library.
# Google Benchmark library v1.6.1 released on 2022-01-10.
http_archive(
name = "com_google_benchmark",
urls = ["https://github.com/google/benchmark/archive/main.zip"],
strip_prefix = "benchmark-main",
urls = ["https://github.com/google/benchmark/archive/refs/tags/v1.6.1.tar.gz"],
strip_prefix = "benchmark-1.6.1",
sha256 = "6132883bc8c9b0df5375b16ab520fac1a85dc9e4cf5be59480448ece74b278d4",
build_file = "@//third_party:benchmark.BUILD",
)
@ -373,9 +374,9 @@ http_archive(
)
# Tensorflow repo should always go after the other external dependencies.
# 2021-12-02
_TENSORFLOW_GIT_COMMIT = "18a1dc0ba806dc023808531f0373d9ec068e64bf"
_TENSORFLOW_SHA256 = "85b90416f7a11339327777bccd634de00ca0de2cf334f5f0727edcb11ff9289a"
# 2022-02-15
_TENSORFLOW_GIT_COMMIT = "a3419acc751dfc19caf4d34a1594e1f76810ec58"
_TENSORFLOW_SHA256 = "b95b2a83632d4055742ae1a2dcc96b45da6c12a339462dbc76c8bca505308e3a"
http_archive(
name = "org_tensorflow",
urls = [
@ -383,7 +384,6 @@ http_archive(
],
patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_objc_cxx17.diff",
# Diff is generated with a script, don't update it manually.
"@//third_party:org_tensorflow_custom_ops.diff",
],

View File

@ -109,7 +109,7 @@ for app in ${apps}; do
if [[ ${category} != "shoe" ]]; then
bazel_flags_extended+=(--define ${category}=true)
fi
bazel "${bazel_flags_extended[@]}"
bazelisk "${bazel_flags_extended[@]}"
cp -f "${bin}" "${apk}"
fi
apks+=(${apk})
@ -120,7 +120,7 @@ for app in ${apps}; do
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_4
fi
bazel "${bazel_flags[@]}"
bazelisk "${bazel_flags[@]}"
cp -f "${bin}" "${apk}"
if [[ ${app_name} == "templatematchingcpu" ]]; then
switch_to_opencv_3

View File

@ -83,7 +83,7 @@ for app in ${apps}; do
bazel_flags=("${default_bazel_flags[@]}")
bazel_flags+=(${target})
bazel "${bazel_flags[@]}"
bazelisk "${bazel_flags[@]}"
cp -f "${bin_dir}/${app}/"*"_cpu" "${out_dir}"
fi
if [[ $build_only == false ]]; then

View File

@ -71,7 +71,7 @@ for app in ${apps}; do
bazel_flags+=(--linkopt=-s)
fi
bazel "${bazel_flags[@]}"
bazelisk "${bazel_flags[@]}"
cp -f "${bin_dir}/${app}/"*".ipa" "${out_dir}"
fi
done

View File

@ -169,7 +169,7 @@ behavior depending on resource constraints.
[`CalculatorBase`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator_base.h
[`DefaultInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/default_input_stream_handler.h
[`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.h
[`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.h
[`SyncSetInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
[`ImmediateInputStreamHandler`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
[`CalculatorGraphConfig::max_queue_size`]: https://github.com/google/mediapipe/tree/master/mediapipe/framework/calculator.proto
[`FlowLimiterCalculator`]: https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/flow_limiter_calculator.cc

View File

@ -30,7 +30,7 @@ APIs (currently in alpha) that are now available in
* Install MediaPipe following these [instructions](./install.md).
* Setup Java Runtime.
* Setup Android SDK release 30.0.0 and above.
* Setup Android NDK version 18 and above.
* Setup Android NDK version between 18 and 21.
MediaPipe recommends setting up Android SDK and NDK via Android Studio (and see
below for Android Studio setup). However, if you prefer using MediaPipe without

View File

@ -48,6 +48,16 @@ each project.
bazel build -c opt --strip=ALWAYS \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a \
--legacy_whole_archive=0 \
--features=-legacy_whole_archive \
--copt=-fvisibility=hidden \
--copt=-ffunction-sections \
--copt=-fdata-sections \
--copt=-fstack-protector \
--copt=-Oz \
--copt=-fomit-frame-pointer \
--copt=-DABSL_MIN_LOG_LEVEL=2 \
--linkopt=-Wl,--gc-sections,--strip-all \
//path/to/the/aar/build/file:aar_name.aar
```
@ -57,6 +67,16 @@ each project.
bazel build -c opt --strip=ALWAYS \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a \
--legacy_whole_archive=0 \
--features=-legacy_whole_archive \
--copt=-fvisibility=hidden \
--copt=-ffunction-sections \
--copt=-fdata-sections \
--copt=-fstack-protector \
--copt=-Oz \
--copt=-fomit-frame-pointer \
--copt=-DABSL_MIN_LOG_LEVEL=2 \
--linkopt=-Wl,--gc-sections,--strip-all \
//mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mediapipe_face_detection.aar
# It should print:

View File

@ -569,7 +569,7 @@ next section.
Option 1. Follow
[the official Bazel documentation](https://docs.bazel.build/versions/master/install-windows.html)
to install Bazel 4.2.1 or higher.
to install Bazel 5.0.0 or higher.
Option 2. Follow the official
[Bazel documentation](https://docs.bazel.build/versions/master/install-bazelisk.html)

View File

@ -126,6 +126,7 @@ following steps:
}
return packet.Get<MyType>();
});
}
} // namespace mediapipe
```

View File

@ -136,8 +136,8 @@ run code search using
## Community
* [Awesome MediaPipe](https://mediapipe.org) - A curated list of awesome
MediaPipe related frameworks, libraries and software
* [Awesome MediaPipe](https://mediapipe.page.link/awesome-mediapipe) - A
curated list of awesome MediaPipe related frameworks, libraries and software
* [Slack community](https://mediapipe.page.link/joinslack) for MediaPipe users
* [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General
community discussion around MediaPipe

View File

@ -26,7 +26,7 @@ MediaPipe Face Detection is an ultrafast face detection solution that comes with
face detector tailored for mobile GPU inference. The detector's super-realtime
performance enables it to be applied to any live viewfinder experience that
requires an accurate facial region of interest as an input for other
task-specific models, such as 3D facial keypoint or geometry estimation (e.g.,
task-specific models, such as 3D facial keypoint estimation (e.g.,
[MediaPipe Face Mesh](./face_mesh.md)), facial features or expression
classification, and face region segmentation. BlazeFace uses a lightweight
feature extraction network inspired by, but distinct from

View File

@ -20,34 +20,34 @@ nav_order: 2
## Overview
MediaPipe Face Mesh is a face geometry solution that estimates 468 3D face
landmarks in real-time even on mobile devices. It employs machine learning (ML)
to infer the 3D surface geometry, requiring only a single camera input without
the need for a dedicated depth sensor. Utilizing lightweight model architectures
together with GPU acceleration throughout the pipeline, the solution delivers
real-time performance critical for live experiences.
MediaPipe Face Mesh is a solution that estimates 468 3D face landmarks in
real-time even on mobile devices. It employs machine learning (ML) to infer the
3D facial surface, requiring only a single camera input without the need for a
dedicated depth sensor. Utilizing lightweight model architectures together with
GPU acceleration throughout the pipeline, the solution delivers real-time
performance critical for live experiences.
Additionally, the solution is bundled with the Face Geometry module that bridges
the gap between the face landmark estimation and useful real-time augmented
reality (AR) applications. It establishes a metric 3D space and uses the face
landmark screen positions to estimate face geometry within that space. The face
geometry data consists of common 3D geometry primitives, including a face pose
transformation matrix and a triangular face mesh. Under the hood, a lightweight
statistical analysis method called
Additionally, the solution is bundled with the Face Transform module that
bridges the gap between the face landmark estimation and useful real-time
augmented reality (AR) applications. It establishes a metric 3D space and uses
the face landmark screen positions to estimate a face transform within that
space. The face transform data consists of common 3D primitives, including a
face pose transformation matrix and a triangular face mesh. Under the hood, a
lightweight statistical analysis method called
[Procrustes Analysis](https://en.wikipedia.org/wiki/Procrustes_analysis) is
employed to drive a robust, performant and portable logic. The analysis runs on
CPU and has a minimal speed/memory footprint on top of the ML model inference.
![face_mesh_ar_effects.gif](../images/face_mesh_ar_effects.gif) |
:-------------------------------------------------------------: |
*Fig 1. AR effects utilizing facial surface geometry.* |
*Fig 1. AR effects utilizing the 3D facial surface.* |
## ML Pipeline
Our ML pipeline consists of two real-time deep neural network models that work
together: A detector that operates on the full image and computes face locations
and a 3D face landmark model that operates on those locations and predicts the
approximate surface geometry via regression. Having the face accurately cropped
approximate 3D surface via regression. Having the face accurately cropped
drastically reduces the need for common data augmentations like affine
transformations consisting of rotations, translation and scale changes. Instead
it allows the network to dedicate most of its capacity towards coordinate
@ -55,8 +55,8 @@ prediction accuracy. In addition, in our pipeline the crops can also be
generated based on the face landmarks identified in the previous frame, and only
when the landmark model could no longer identify face presence is the face
detector invoked to relocalize the face. This strategy is similar to that
employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm detector
together with a hand landmark model.
employed in our [MediaPipe Hands](./hands.md) solution, which uses a palm
detector together with a hand landmark model.
The pipeline is implemented as a MediaPipe
[graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt)
@ -128,7 +128,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962).
:---------------------------------------------------------------------------: |
*Fig 3. Attention Mesh: Overview of model architecture.* |
## Face Geometry Module
## Face Transform Module
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
detection in the screen coordinate space: the X- and Y- coordinates are
@ -140,7 +140,7 @@ enable the full spectrum of augmented reality (AR) features like aligning a
virtual 3D object with a detected face.
The
[Face Geometry module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
[Face Transform module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry)
moves away from the screen coordinate space towards a metric 3D space and
provides necessary primitives to handle a detected face as a regular 3D object.
By design, you'll be able to use a perspective camera to project the final 3D
@ -151,7 +151,7 @@ landmark positions are not changed.
#### Metric 3D Space
The **Metric 3D space** established within the Face Geometry module is a
The **Metric 3D space** established within the Face Transform module is a
right-handed orthonormal metric 3D coordinate space. Within the space, there is
a **virtual perspective camera** located at the space origin and pointed in the
negative direction of the Z-axis. In the current pipeline, it is assumed that
@ -184,11 +184,11 @@ functions:
### Components
#### Geometry Pipeline
#### Transform Pipeline
The **Geometry Pipeline** is a key component, which is responsible for
estimating face geometry objects within the Metric 3D space. On each frame, the
following steps are executed in the given order:
The **Transform Pipeline** is a key component, which is responsible for
estimating the face transform objects within the Metric 3D space. On each frame,
the following steps are executed in the given order:
- Face landmark screen coordinates are converted into the Metric 3D space
coordinates;
@ -199,12 +199,12 @@ following steps are executed in the given order:
positions (XYZ), while both the vertex texture coordinates (UV) and the
triangular topology are inherited from the canonical face model.
The geometry pipeline is implemented as a MediaPipe
The transform pipeline is implemented as a MediaPipe
[calculator](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc).
For your convenience, the face geometry pipeline calculator is bundled together
with corresponding metadata into a unified MediaPipe
For your convenience, this calculator is bundled together with corresponding
metadata into a unified MediaPipe
[subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt).
The face geometry format is defined as a Protocol Buffer
The face transform format is defined as a Protocol Buffer
[message](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto).
#### Effect Renderer
@ -227,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
| :---------------------------------------------------------------------: |
| *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* |
| *Fig 5. An example of face effects rendered by the Face Transform Effect Renderer.* |
## Solution APIs

View File

@ -116,7 +116,7 @@ on how to build MediaPipe examples.
Note: The following runs TensorFlow inference on CPU. If you would like to
run inference on GPU (Linux only), please follow
[TensorFlow CUDA Support and Setup on Linux Desktop](gpu.md#tensorflow-cuda-support-and-setup-on-linux-desktop)
[TensorFlow CUDA Support and Setup on Linux Desktop](../getting_started/gpu_support.md#tensorflow-cuda-support-and-setup-on-linux-desktop)
instead.
To build the TensorFlow CPU inference example on desktop, run:

View File

@ -384,7 +384,7 @@ Supported configuration options:
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/objectron/objectron.js" crossorigin="anonymous"></script>
</head>

View File

@ -359,7 +359,7 @@ Supported configuration options:
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils_3d/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
</head>

View File

@ -117,6 +117,7 @@ mediapipe_proto_library(
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
"//mediapipe/framework/formats:classification_proto",
"//mediapipe/framework/formats:landmark_proto",
],
)
@ -309,8 +310,8 @@ cc_library(
)
cc_library(
name = "concatenate_normalized_landmark_list_calculator",
srcs = ["concatenate_normalized_landmark_list_calculator.cc"],
name = "concatenate_proto_list_calculator",
srcs = ["concatenate_proto_list_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":concatenate_vector_calculator_cc_proto",
@ -324,10 +325,10 @@ cc_library(
)
cc_test(
name = "concatenate_normalized_landmark_list_calculator_test",
srcs = ["concatenate_normalized_landmark_list_calculator_test.cc"],
name = "concatenate_proto_list_calculator_test",
srcs = ["concatenate_proto_list_calculator_test.cc"],
deps = [
":concatenate_normalized_landmark_list_calculator",
":concatenate_proto_list_calculator",
":concatenate_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
@ -964,8 +965,8 @@ cc_test(
)
cc_library(
name = "split_landmarks_calculator",
srcs = ["split_landmarks_calculator.cc"],
name = "split_proto_list_calculator",
srcs = ["split_proto_list_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":split_vector_calculator_cc_proto",
@ -979,10 +980,10 @@ cc_library(
)
cc_test(
name = "split_landmarks_calculator_test",
srcs = ["split_landmarks_calculator_test.cc"],
name = "split_proto_list_calculator_test",
srcs = ["split_proto_list_calculator_test.cc"],
deps = [
":split_landmarks_calculator",
":split_proto_list_calculator",
":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
@ -1195,6 +1196,7 @@ cc_library(
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:collection_item_id",
"//mediapipe/framework/formats:classification_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:integral_types",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",

View File

@ -1,79 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// Concatenates several NormalizedLandmarkList protos following stream index
// order. This class assumes that every input stream contains a
// NormalizedLandmarkList proto object.
class ConcatenateNormalizedLandmarkListCalculator : public Node {
public:
static constexpr Input<NormalizedLandmarkList>::Multiple kIn{""};
static constexpr Output<NormalizedLandmarkList> kOut{""};
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return absl::OkStatus();
}
}
NormalizedLandmarkList output;
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
const NormalizedLandmarkList& list = *input;
for (int j = 0; j < list.landmark_size(); ++j) {
*output.add_landmark() = list.landmark(j);
}
}
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
private:
bool only_emit_if_all_present_;
};
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
} // namespace api2
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_NORMALIZED_LIST_CALCULATOR_H_

View File

@ -0,0 +1,118 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/concatenate_vector_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace api2 {
// Concatenate several input packets of ListType with a repeated field of
// ItemType into a single output packet of ListType following stream index
// order.
template <typename ItemType, typename ListType>
class ConcatenateListsCalculator : public Node {
public:
static constexpr typename Input<ListType>::Multiple kIn{""};
static constexpr Output<ListType> kOut{""};
MEDIAPIPE_NODE_CONTRACT(kIn, kOut);
static absl::Status UpdateContract(CalculatorContract* cc) {
RET_CHECK_GE(kIn(cc).Count(), 1);
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
only_emit_if_all_present_ =
cc->Options<::mediapipe::ConcatenateVectorCalculatorOptions>()
.only_emit_if_all_present();
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (only_emit_if_all_present_) {
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) return absl::OkStatus();
}
}
ListType output;
for (const auto& input : kIn(cc)) {
if (input.IsEmpty()) continue;
const ListType& list = *input;
for (int j = 0; j < ListSize(list); ++j) {
*AddItem(output) = GetItem(list, j);
}
}
kOut(cc).Send(std::move(output));
return absl::OkStatus();
}
protected:
virtual int ListSize(const ListType& list) const = 0;
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
virtual ItemType* AddItem(ListType& list) const = 0;
private:
bool only_emit_if_all_present_;
};
// TODO: Move calculators to separate *.cc files
class ConcatenateNormalizedLandmarkListCalculator
: public ConcatenateListsCalculator<NormalizedLandmark,
NormalizedLandmarkList> {
protected:
int ListSize(const NormalizedLandmarkList& list) const override {
return list.landmark_size();
}
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
int idx) const override {
return list.landmark(idx);
}
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
return list.add_landmark();
}
};
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListCalculator);
class ConcatenateLandmarkListCalculator
: public ConcatenateListsCalculator<Landmark, LandmarkList> {
protected:
int ListSize(const LandmarkList& list) const override {
return list.landmark_size();
}
const Landmark GetItem(const LandmarkList& list, int idx) const override {
return list.landmark(idx);
}
Landmark* AddItem(LandmarkList& list) const override {
return list.add_landmark();
}
};
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListCalculator);
} // namespace api2
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_CONCATENATE_PROTO_LIST_CALCULATOR_H_

View File

@ -18,6 +18,7 @@
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/collection_item_id.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/ret_check.h"
@ -79,6 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase {
packet.Set<uint64>();
} else if (packet_options.has_classification_list_value()) {
packet.Set<ClassificationList>();
} else if (packet_options.has_landmark_list_value()) {
packet.Set<LandmarkList>();
} else {
return absl::InvalidArgumentError(
"None of supported values were specified in options.");
@ -108,6 +111,9 @@ class ConstantSidePacketCalculator : public CalculatorBase {
} else if (packet_options.has_classification_list_value()) {
packet.Set(MakePacket<ClassificationList>(
packet_options.classification_list_value()));
} else if (packet_options.has_landmark_list_value()) {
packet.Set(
MakePacket<LandmarkList>(packet_options.landmark_list_value()));
} else {
return absl::InvalidArgumentError(
"None of supported values were specified in options.");

View File

@ -18,6 +18,7 @@ package mediapipe;
import "mediapipe/framework/calculator.proto";
import "mediapipe/framework/formats/classification.proto";
import "mediapipe/framework/formats/landmark.proto";
option objc_class_prefix = "MediaPipe";
@ -34,6 +35,7 @@ message ConstantSidePacketCalculatorOptions {
string string_value = 4;
uint64 uint64_value = 5;
ClassificationList classification_list_value = 6;
LandmarkList landmark_list_value = 7;
}
}

View File

@ -29,6 +29,11 @@ namespace api2 {
// This calculator periodically copies the GraphProfile from
// mediapipe::GraphProfiler::CaptureProfile to the "PROFILE" output stream.
//
// Similarly to the log files saved by GraphProfiler::WriteProfile when trace
// logging is enabled, the first captured profile contains the full
// canonicalized graph config and, if tracing is enabled, calculator names in
// graph traces. Subsequent profiles omit this information.
//
// Example config:
// node {
// calculator: "GraphProfileCalculator"
@ -50,11 +55,14 @@ class GraphProfileCalculator : public Node {
absl::Status Process(CalculatorContext* cc) final {
auto options = cc->Options<::mediapipe::GraphProfileCalculatorOptions>();
if (prev_profile_ts_ == Timestamp::Unset() ||
bool first_profile = prev_profile_ts_ == Timestamp::Unset();
if (first_profile ||
cc->InputTimestamp() - prev_profile_ts_ >= options.profile_interval()) {
prev_profile_ts_ = cc->InputTimestamp();
GraphProfile result;
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(&result));
MP_RETURN_IF_ERROR(cc->GetProfilingContext()->CaptureProfile(
&result, first_profile ? PopulateGraphConfig::kFull
: PopulateGraphConfig::kNo));
kProfileOut(cc).Send(result);
}
return absl::OkStatus();

View File

@ -202,6 +202,8 @@ TEST_F(GraphProfileCalculatorTest, GraphProfile) {
}
})pb");
ASSERT_EQ(output_packets.size(), 2);
EXPECT_TRUE(output_packets[0].Get<GraphProfile>().has_config());
EXPECT_THAT(output_packets[1].Get<GraphProfile>(),
mediapipe::EqualsProto(expected_profile));
}

View File

@ -23,8 +23,8 @@
#include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/status.h"
// Quantizes a vector of floats to a std::string so that each float becomes a
// byte in the [0, 255] range. Any value above max_quantized_value or below
// Quantizes a vector of floats to a string so that each float becomes a byte
// in the [0, 255] range. Any value above max_quantized_value or below
// min_quantized_value will be saturated to '/xFF' or '/0'.
//
// Example config:

View File

@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
@ -24,30 +24,30 @@
namespace mediapipe {
// Splits an input packet with LandmarkListType into
// multiple LandmarkListType output packets using the [begin, end) ranges
// Splits an input packet of ListType with a repeated field of ItemType
// into multiple ListType output packets using the [begin, end) ranges
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
// set to true, all ranges should be of size 1 and all outputs will be elements
// of type LandmarkType. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type LandmarkListType.
// of type ItemType. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type ListType.
// If the option "combine_outputs" is set to true, only one output stream can be
// specified and all ranges of elements will be combined into one
// LandmarkListType.
template <typename LandmarkType, typename LandmarkListType>
class SplitLandmarksCalculator : public CalculatorBase {
// ListType.
template <typename ItemType, typename ListType>
class SplitListsCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() == 1);
RET_CHECK(cc->Outputs().NumEntries() != 0);
cc->Inputs().Index(0).Set<LandmarkListType>();
cc->Inputs().Index(0).Set<ListType>();
const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
if (options.combine_outputs()) {
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Outputs().Index(0).Set<LandmarkListType>();
cc->Outputs().Index(0).Set<ListType>();
for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i);
@ -82,9 +82,9 @@ class SplitLandmarksCalculator : public CalculatorBase {
return absl::InvalidArgumentError(
"Since element_only is true, all ranges should be of size 1.");
}
cc->Outputs().Index(i).Set<LandmarkType>();
cc->Outputs().Index(i).Set<ItemType>();
} else {
cc->Outputs().Index(i).Set<LandmarkListType>();
cc->Outputs().Index(i).Set<ListType>();
}
}
}
@ -111,39 +111,38 @@ class SplitLandmarksCalculator : public CalculatorBase {
}
absl::Status Process(CalculatorContext* cc) override {
const LandmarkListType& input =
cc->Inputs().Index(0).Get<LandmarkListType>();
RET_CHECK_GE(input.landmark_size(), max_range_end_)
<< "Max range end " << max_range_end_ << " exceeds landmarks size "
<< input.landmark_size();
const ListType& input = cc->Inputs().Index(0).Get<ListType>();
RET_CHECK_GE(ListSize(input), max_range_end_)
<< "Max range end " << max_range_end_ << " exceeds list size "
<< ListSize(input);
if (combine_outputs_) {
LandmarkListType output;
ListType output;
for (int i = 0; i < ranges_.size(); ++i) {
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const LandmarkType& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
const ItemType& input_item = GetItem(input, j);
*AddItem(output) = input_item;
}
}
RET_CHECK_EQ(output.landmark_size(), total_elements_);
RET_CHECK_EQ(ListSize(output), total_elements_);
cc->Outputs().Index(0).AddPacket(
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
MakePacket<ListType>(output).At(cc->InputTimestamp()));
} else {
if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket(
MakePacket<LandmarkType>(input.landmark(ranges_[i].first))
MakePacket<ItemType>(GetItem(input, ranges_[i].first))
.At(cc->InputTimestamp()));
}
} else {
for (int i = 0; i < ranges_.size(); ++i) {
LandmarkListType output;
ListType output;
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const LandmarkType& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
const ItemType& input_item = GetItem(input, j);
*AddItem(output) = input_item;
}
cc->Outputs().Index(i).AddPacket(
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
MakePacket<ListType>(output).At(cc->InputTimestamp()));
}
}
}
@ -151,6 +150,11 @@ class SplitLandmarksCalculator : public CalculatorBase {
return absl::OkStatus();
}
protected:
virtual int ListSize(const ListType& list) const = 0;
virtual const ItemType GetItem(const ListType& list, int idx) const = 0;
virtual ItemType* AddItem(ListType& list) const = 0;
private:
std::vector<std::pair<int32, int32>> ranges_;
int32 max_range_end_ = -1;
@ -159,15 +163,40 @@ class SplitLandmarksCalculator : public CalculatorBase {
bool combine_outputs_ = false;
};
typedef SplitLandmarksCalculator<NormalizedLandmark, NormalizedLandmarkList>
SplitNormalizedLandmarkListCalculator;
// TODO: Move calculators to separate *.cc files
class SplitNormalizedLandmarkListCalculator
: public SplitListsCalculator<NormalizedLandmark, NormalizedLandmarkList> {
protected:
int ListSize(const NormalizedLandmarkList& list) const override {
return list.landmark_size();
}
const NormalizedLandmark GetItem(const NormalizedLandmarkList& list,
int idx) const override {
return list.landmark(idx);
}
NormalizedLandmark* AddItem(NormalizedLandmarkList& list) const override {
return list.add_landmark();
}
};
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
typedef SplitLandmarksCalculator<Landmark, LandmarkList>
SplitLandmarkListCalculator;
class SplitLandmarkListCalculator
: public SplitListsCalculator<Landmark, LandmarkList> {
protected:
int ListSize(const LandmarkList& list) const override {
return list.landmark_size();
}
const Landmark GetItem(const LandmarkList& list, int idx) const override {
return list.landmark(idx);
}
Landmark* AddItem(LandmarkList& list) const override {
return list.add_landmark();
}
};
REGISTER_CALCULATOR(SplitLandmarkListCalculator);
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_PROTO_LIST_CALCULATOR_H_

View File

@ -24,7 +24,7 @@
namespace mediapipe {
// Calculator that converts a std::string into an integer type, or fails if the
// Calculator that converts a string into an integer type, or fails if the
// conversion is not possible.
//
// Example config:
@ -47,7 +47,7 @@ class StringToIntCalculatorTemplate : public CalculatorBase {
if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get<std::string>(),
&number)) {
return absl::InvalidArgumentError(
"The std::string could not be parsed as an integer.");
"The string could not be parsed as an integer.");
}
cc->OutputSidePackets().Index(0).Set(MakePacket<IntType>(number));
return absl::OkStatus();

View File

@ -239,10 +239,13 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":image_transformation_calculator_cc_proto",
"//mediapipe/framework:packet",
"//mediapipe/framework:timestamp",
"//mediapipe/gpu:scale_mode_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:video_stream_header",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",

View File

@ -105,7 +105,7 @@ absl::StatusOr<ImageFileProperties> GetImageFileProperites(
} // namespace
// Calculator to extract EXIF information from an image file. The input is
// a std::string containing raw byte data from a file, and the output is an
// a string containing raw byte data from a file, and the output is an
// ImageFileProperties proto object with the relevant fields filled in.
// The calculator accepts the input as a stream or a side packet, and can output
// the result as a stream or a side packet. The calculator checks that if an

View File

@ -16,10 +16,13 @@
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/video_stream_header.h"
#include "mediapipe/framework/packet.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/timestamp.h"
#include "mediapipe/gpu/scale_mode.pb.h"
#if !MEDIAPIPE_DISABLE_GPU
@ -52,6 +55,7 @@ namespace mediapipe {
namespace {
constexpr char kImageFrameTag[] = "IMAGE";
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
constexpr char kVideoPrestreamTag[] = "VIDEO_PRESTREAM";
int RotationModeToDegrees(mediapipe::RotationMode_Mode rotation) {
switch (rotation) {
@ -122,6 +126,12 @@ mediapipe::ScaleMode_Mode ParseScaleMode(
// provided, it overrides the FLIP_VERTICALLY input side packet and/or
// corresponding field in the calculator options.
//
// VIDEO_PRESTREAM (optional): VideoHeader for the input ImageFrames, if
// rotating or scaling the frames, the header width and height will be updated
// appropriately. Note the header is updated only based on dimensions and
// rotations specified as side packets or options, input_stream
// transformations will not update the header.
//
// Output:
// One of the following tags:
// IMAGE - ImageFrame representing the output image.
@ -242,6 +252,21 @@ absl::Status ImageTransformationCalculator::GetContract(
cc->Inputs().Tag("FLIP_VERTICALLY").Set<bool>();
}
RET_CHECK(cc->Inputs().HasTag(kVideoPrestreamTag) ==
cc->Outputs().HasTag(kVideoPrestreamTag))
<< "If VIDEO_PRESTREAM is provided, it must be provided both as an "
"inputs and output stream.";
if (cc->Inputs().HasTag(kVideoPrestreamTag)) {
RET_CHECK(!(cc->Inputs().HasTag("OUTPUT_DIMENSIONS") ||
cc->Inputs().HasTag("ROTATION_DEGREES")))
<< "If specifying VIDEO_PRESTREAM, the transformations that affect the "
"dimensions of the frames (OUTPUT_DIMENSIONS and ROTATION_DEGREES) "
"need to be constant for every frame, meaning they can only be "
"provided in the calculator options or side packets.";
cc->Inputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
cc->Outputs().Tag(kVideoPrestreamTag).Set<mediapipe::VideoHeader>();
}
if (cc->InputSidePackets().HasTag("OUTPUT_DIMENSIONS")) {
cc->InputSidePackets().Tag("OUTPUT_DIMENSIONS").Set<DimensionsPacketType>();
}
@ -326,6 +351,24 @@ absl::Status ImageTransformationCalculator::Open(CalculatorContext* cc) {
}
absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
// First update the video header if it is given, based on the rotation and
// dimensions specified as side packets or options. This will only be done
// once, so streaming transformation changes will not be reflected in
// the header.
if (cc->Inputs().HasTag(kVideoPrestreamTag) &&
!cc->Inputs().Tag(kVideoPrestreamTag).IsEmpty() &&
cc->Outputs().HasTag(kVideoPrestreamTag)) {
mediapipe::VideoHeader header =
cc->Inputs().Tag(kVideoPrestreamTag).Get<mediapipe::VideoHeader>();
// Update the header's width and height if needed.
ComputeOutputDimensions(header.width, header.height, &header.width,
&header.height);
cc->Outputs()
.Tag(kVideoPrestreamTag)
.AddPacket(mediapipe::MakePacket<mediapipe::VideoHeader>(header).At(
mediapipe::Timestamp::PreStream()));
}
// Override values if specified so.
if (cc->Inputs().HasTag("ROTATION_DEGREES") &&
!cc->Inputs().Tag("ROTATION_DEGREES").IsEmpty()) {

View File

@ -22,9 +22,9 @@
namespace mediapipe {
// Takes in an encoded image std::string, decodes it by OpenCV, and converts to
// an ImageFrame. Note that this calculator only supports grayscale and RGB
// images for now.
// Takes in an encoded image string, decodes it by OpenCV, and converts to an
// ImageFrame. Note that this calculator only supports grayscale and RGB images
// for now.
//
// Example config:
// node {

View File

@ -20,8 +20,8 @@
namespace mediapipe {
// Takes in a std::string, draws the text std::string by cv::putText(), and
// outputs an ImageFrame.
// Takes in a string, draws the text string by cv::putText(), and outputs an
// ImageFrame.
//
// Example config:
// node {

View File

@ -553,7 +553,6 @@ absl::Status ScaleImageCalculator::Process(CalculatorContext* cc) {
}
}
cc->GetCounter("Inputs")->Increment();
const ImageFrame* image_frame;
ImageFrame converted_image_frame;
if (input_format_ == ImageFormat::YCBCR420P) {

View File

@ -183,22 +183,22 @@ absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
// Setup source images.
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
const cv::Mat current_mat = mediapipe::formats::MatView(&current_frame);
RET_CHECK_EQ(current_mat.type(), CV_32FC1)
auto current_mat = mediapipe::formats::MatView(&current_frame);
RET_CHECK_EQ(current_mat->type(), CV_32FC1)
<< "Only 1-channel float input image is supported.";
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
const cv::Mat previous_mat = mediapipe::formats::MatView(&previous_frame);
RET_CHECK_EQ(previous_mat.type(), current_mat.type())
<< "Warning: mixing input format types: " << previous_mat.type()
<< " != " << previous_mat.type();
auto previous_mat = mediapipe::formats::MatView(&previous_frame);
RET_CHECK_EQ(previous_mat->type(), current_mat->type())
<< "Warning: mixing input format types: " << previous_mat->type()
<< " != " << previous_mat->type();
RET_CHECK_EQ(current_mat.rows, previous_mat.rows);
RET_CHECK_EQ(current_mat.cols, previous_mat.cols);
RET_CHECK_EQ(current_mat->rows, previous_mat->rows);
RET_CHECK_EQ(current_mat->cols, previous_mat->cols);
// Setup destination image.
auto output_frame = std::make_shared<ImageFrame>(
current_frame.image_format(), current_mat.cols, current_mat.rows);
current_frame.image_format(), current_mat->cols, current_mat->rows);
cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get());
output_mat.setTo(cv::Scalar(0));
@ -233,8 +233,8 @@ absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
// Write directly to the first channel of output.
for (int i = 0; i < output_mat.rows; ++i) {
float* out_ptr = output_mat.ptr<float>(i);
const float* curr_ptr = current_mat.ptr<float>(i);
const float* prev_ptr = previous_mat.ptr<float>(i);
const float* curr_ptr = current_mat->ptr<float>(i);
const float* prev_ptr = previous_mat->ptr<float>(i);
for (int j = 0; j < output_mat.cols; ++j) {
const float new_mask_value = curr_ptr[j];
const float prev_mask_value = prev_ptr[j];

View File

@ -116,8 +116,8 @@ void RunGraph(Packet curr_packet, Packet prev_packet, bool use_gpu, float ratio,
ASSERT_EQ(1, output_packets.size());
Image result_image = output_packets[0].Get<Image>();
cv::Mat result_mat = formats::MatView(&result_image);
result_mat.copyTo(*result);
auto result_mat = formats::MatView(&result_image);
result_mat->copyTo(*result);
// Fully close graph at end, otherwise calculator+Images are destroyed
// after calling WaitUntilDone().
@ -135,10 +135,10 @@ void RunTest(bool use_gpu, float mix_ratio, cv::Mat& test_result) {
Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height));
curr_mat.copyTo(formats::MatView(&(curr_packet.Get<Image>())));
curr_mat.copyTo(*formats::MatView(&(curr_packet.Get<Image>())));
Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height));
prev_mat.copyTo(formats::MatView(&(prev_packet.Get<Image>())));
prev_mat.copyTo(*formats::MatView(&(prev_packet.Get<Image>())));
cv::Mat result;
RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result);

View File

@ -84,14 +84,15 @@ cc_library(
tags = ["nomac"], # config problem with cpuinfo via TF
deps = [
"inference_calculator_interface",
"//mediapipe/framework/deps:file_path",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/util/tflite:config",
"//mediapipe/util/tflite:tflite_gpu_runner",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program",
"@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader",
],
alwayslink = 1,
)
@ -154,7 +155,7 @@ cc_library(
cc_library(
name = "inference_calculator_gl_if_compute_shader_available",
deps = select({
deps = selects.with_or({
":compute_shader_unavailable": [],
"//conditions:default": [":inference_calculator_gl"],
}),
@ -303,7 +304,7 @@ cc_library(
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/formats/object_detection:anchor_cc_proto",
"//mediapipe/framework/port:ret_check",
] + select({
] + selects.with_or({
":compute_shader_unavailable": [],
"//conditions:default": [":tensors_to_detections_calculator_gpu_deps"],
}),
@ -560,7 +561,7 @@ cc_library(
cc_library(
name = "image_to_tensor_calculator_gpu_deps",
deps = select({
deps = selects.with_or({
"//mediapipe:android": [
":image_to_tensor_converter_gl_buffer",
"//mediapipe/gpu:gl_calculator_helper",
@ -684,7 +685,7 @@ cc_library(
name = "image_to_tensor_converter_gl_buffer",
srcs = ["image_to_tensor_converter_gl_buffer.cc"],
hdrs = ["image_to_tensor_converter_gl_buffer.h"],
deps = ["//mediapipe/framework:port"] + select({
deps = ["//mediapipe/framework:port"] + selects.with_or({
"//mediapipe:apple": [],
"//conditions:default": [
":image_to_tensor_converter",

View File

@ -49,7 +49,6 @@
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_texture.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
@ -142,11 +141,24 @@ class ImageToTensorCalculator : public Node {
const auto& options =
cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
RET_CHECK(options.has_output_tensor_float_range())
RET_CHECK(options.has_output_tensor_float_range() ||
options.has_output_tensor_int_range())
<< "Output tensor range is required.";
if (options.has_output_tensor_float_range()) {
RET_CHECK_LT(options.output_tensor_float_range().min(),
options.output_tensor_float_range().max())
<< "Valid output tensor range is required.";
<< "Valid output float tensor range is required.";
}
if (options.has_output_tensor_int_range()) {
RET_CHECK_LT(options.output_tensor_int_range().min(),
options.output_tensor_int_range().max())
<< "Valid output int tensor range is required.";
RET_CHECK_GE(options.output_tensor_int_range().min(), 0)
<< "The minimum of the output int tensor range must be non-negative.";
RET_CHECK_LE(options.output_tensor_int_range().max(), 255)
<< "The maximum of the output int tensor range must be less than or "
"equal to 255.";
}
RET_CHECK_GT(options.output_tensor_width(), 0)
<< "Valid output tensor width is required.";
RET_CHECK_GT(options.output_tensor_height(), 0)
@ -175,9 +187,15 @@ class ImageToTensorCalculator : public Node {
options_ = cc->Options<mediapipe::ImageToTensorCalculatorOptions>();
output_width_ = options_.output_tensor_width();
output_height_ = options_.output_tensor_height();
range_min_ = options_.output_tensor_float_range().min();
range_max_ = options_.output_tensor_float_range().max();
is_int_output_ = options_.has_output_tensor_int_range();
range_min_ =
is_int_output_
? static_cast<float>(options_.output_tensor_int_range().min())
: options_.output_tensor_float_range().min();
range_max_ =
is_int_output_
? static_cast<float>(options_.output_tensor_int_range().max())
: options_.output_tensor_float_range().max();
return absl::OkStatus();
}
@ -225,7 +243,7 @@ class ImageToTensorCalculator : public Node {
}
// Lazy initialization of the GPU or CPU converter.
MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, image->UsesGpu()));
MP_RETURN_IF_ERROR(InitConverterIfNecessary(cc, *image.get()));
ASSIGN_OR_RETURN(Tensor tensor,
(image->UsesGpu() ? gpu_converter_ : cpu_converter_)
@ -283,9 +301,15 @@ class ImageToTensorCalculator : public Node {
}
}
absl::Status InitConverterIfNecessary(CalculatorContext* cc, bool use_gpu) {
absl::Status InitConverterIfNecessary(CalculatorContext* cc,
const Image& image) {
// Lazy initialization of the GPU or CPU converter.
if (use_gpu) {
if (image.UsesGpu()) {
if (is_int_output_) {
return absl::UnimplementedError(
"ImageToTensorConverter for the input GPU image currently doesn't "
"support quantization.");
}
if (!gpu_converter_) {
#if !MEDIAPIPE_DISABLE_GPU
#if MEDIAPIPE_METAL_ENABLED
@ -296,9 +320,17 @@ class ImageToTensorCalculator : public Node {
CreateImageToGlBufferTensorConverter(
cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
#else
ASSIGN_OR_RETURN(gpu_converter_,
// Check whether the underlying storage object is a GL texture.
if (image.GetGpuBuffer()
.internal_storage<mediapipe::GlTextureBuffer>()) {
ASSIGN_OR_RETURN(
gpu_converter_,
CreateImageToGlTextureTensorConverter(
cc, DoesGpuInputStartAtBottom(), GetBorderMode()));
} else {
return absl::UnimplementedError(
"ImageToTensorConverter for the input GPU image is unavailable.");
}
#endif // MEDIAPIPE_METAL_ENABLED
#endif // !MEDIAPIPE_DISABLE_GPU
}
@ -306,7 +338,10 @@ class ImageToTensorCalculator : public Node {
if (!cpu_converter_) {
#if !MEDIAPIPE_DISABLE_OPENCV
ASSIGN_OR_RETURN(cpu_converter_,
CreateOpenCvConverter(cc, GetBorderMode()));
CreateOpenCvConverter(
cc, GetBorderMode(),
is_int_output_ ? Tensor::ElementType::kUInt8
: Tensor::ElementType::kFloat32));
#else
LOG(FATAL) << "Cannot create image to tensor opencv converter since "
"MEDIAPIPE_DISABLE_OPENCV is defined.";
@ -321,6 +356,7 @@ class ImageToTensorCalculator : public Node {
mediapipe::ImageToTensorCalculatorOptions options_;
int output_width_ = 0;
int output_height_ = 0;
bool is_int_output_ = false;
float range_min_ = 0.0f;
float range_max_ = 1.0f;
};

View File

@ -31,6 +31,14 @@ message ImageToTensorCalculatorOptions {
optional float max = 2;
}
// Range of int values [min, max].
// min, must be strictly less than max.
// Please note that IntRange is supported for CPU tensors only.
message IntRange {
optional int64 min = 1;
optional int64 max = 2;
}
// Pixel extrapolation methods. See @border_mode.
enum BorderMode {
BORDER_UNSPECIFIED = 0;
@ -49,6 +57,7 @@ message ImageToTensorCalculatorOptions {
// Output tensor element range/type image pixels are converted to.
oneof range {
FloatRange output_tensor_float_range = 4;
IntRange output_tensor_int_range = 7;
}
// For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs

View File

@ -61,7 +61,8 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
float range_max, int tensor_width,
int tensor_height, bool keep_aspect,
absl::optional<BorderMode> border_mode,
const mediapipe::NormalizedRect& roi) {
const mediapipe::NormalizedRect& roi,
bool output_int_tensor) {
std::string border_mode_str;
if (border_mode) {
switch (*border_mode) {
@ -73,6 +74,21 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
break;
}
}
std::string output_tensor_range;
if (output_int_tensor) {
output_tensor_range = absl::Substitute(R"(output_tensor_int_range {
min: $0
max: $1
})",
static_cast<int>(range_min),
static_cast<int>(range_max));
} else {
output_tensor_range = absl::Substitute(R"(output_tensor_float_range {
min: $0
max: $1
})",
range_min, range_max);
}
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
absl::Substitute(R"(
input_stream: "input_image"
@ -86,22 +102,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: $0
output_tensor_height: $1
keep_aspect_ratio: $4
output_tensor_float_range {
min: $2
max: $3
}
$5 # border mode
keep_aspect_ratio: $2
$3 # output range
$4 # border mode
}
}
}
)",
/*$0=*/tensor_width,
/*$1=*/tensor_height,
/*$2=*/range_min,
/*$3=*/range_max,
/*$4=*/keep_aspect ? "true" : "false",
/*$5=*/border_mode_str));
/*$2=*/keep_aspect ? "true" : "false",
/*$3=*/output_tensor_range,
/*$4=*/border_mode_str));
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
@ -126,11 +138,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
ASSERT_THAT(tensor_vec, testing::SizeIs(1));
const Tensor& tensor = tensor_vec[0];
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
auto view = tensor.GetCpuReadView();
cv::Mat tensor_mat(tensor_height, tensor_width, CV_32FC3,
cv::Mat tensor_mat;
if (output_int_tensor) {
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8);
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_8UC3,
const_cast<uint8*>(view.buffer<uint8>()));
} else {
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
tensor_mat = cv::Mat(tensor_height, tensor_width, CV_32FC3,
const_cast<float*>(view.buffer<float>()));
}
cv::Mat result_rgb;
auto transformation =
GetValueRangeTransformation(range_min, range_max, 0.0f, 255.0f).value();
@ -170,16 +189,26 @@ enum class InputType { kImageFrame, kImage };
const std::vector<InputType> kInputTypesToTest = {InputType::kImageFrame,
InputType::kImage};
void RunTest(cv::Mat input, cv::Mat expected_result, float range_min,
float range_max, int tensor_width, int tensor_height,
bool keep_aspect, absl::optional<BorderMode> border_mode,
void RunTest(cv::Mat input, cv::Mat expected_result,
std::vector<float> float_range, std::vector<int> int_range,
int tensor_width, int tensor_height, bool keep_aspect,
absl::optional<BorderMode> border_mode,
const mediapipe::NormalizedRect& roi) {
ASSERT_EQ(2, float_range.size());
ASSERT_EQ(2, int_range.size());
for (auto input_type : kInputTypesToTest) {
RunTestWithInputImagePacket(
input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
: MakeImagePacket(input),
expected_result, range_min, range_max, tensor_width, tensor_height,
keep_aspect, border_mode, roi);
expected_result, float_range[0], float_range[1], tensor_width,
tensor_height, keep_aspect, border_mode, roi,
/*output_int_tensor=*/false);
RunTestWithInputImagePacket(
input_type == InputType::kImageFrame ? MakeImageFramePacket(input)
: MakeImagePacket(input),
expected_result, int_range[0], int_range[1], tensor_width,
tensor_height, keep_aspect, border_mode, roi,
/*output_int_tensor=*/true);
}
}
@ -195,8 +224,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspect) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
/*border mode*/ {}, roi);
}
@ -213,8 +242,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectBorderZero) {
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_border_zero.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kZero, roi);
}
@ -231,7 +260,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectKeepAspectWithRotation) {
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f, /*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kReplicate, roi);
}
@ -249,7 +279,8 @@ TEST(ImageToTensorCalculatorTest,
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_keep_aspect_with_rotation_border_zero.png"),
/*range_min=*/0.0f, /*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/true,
BorderMode::kZero, roi);
}
@ -267,8 +298,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotation) {
GetRgb(
"/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/medium_sub_rect_with_rotation.png"),
/*range_min=*/-1.0f,
/*range_max=*/1.0f,
/*float_range=*/{-1.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
BorderMode::kReplicate, roi);
}
@ -285,8 +316,8 @@ TEST(ImageToTensorCalculatorTest, MediumSubRectWithRotationBorderZero) {
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"medium_sub_rect_with_rotation_border_zero.png"),
/*range_min=*/-1.0f,
/*range_max=*/1.0f,
/*float_range=*/{-1.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/256, /*tensor_height=*/256, /*keep_aspect=*/false,
BorderMode::kZero, roi);
}
@ -302,8 +333,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRect) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
BorderMode::kReplicate, roi);
}
@ -320,8 +351,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectBorderZero) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_border_zero.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/false,
BorderMode::kZero, roi);
}
@ -338,8 +369,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspect) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/large_sub_rect_keep_aspect.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kReplicate, roi);
}
@ -356,8 +387,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectBorderZero) {
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_border_zero.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kZero, roi);
}
@ -374,8 +405,8 @@ TEST(ImageToTensorCalculatorTest, LargeSubRectKeepAspectWithRotation) {
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
/*border_mode=*/{}, roi);
}
@ -393,8 +424,8 @@ TEST(ImageToTensorCalculatorTest,
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/"
"large_sub_rect_keep_aspect_with_rotation_border_zero.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/128, /*tensor_height=*/128, /*keep_aspect=*/true,
/*border_mode=*/BorderMode::kZero, roi);
}
@ -410,8 +441,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRange) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kReplicate, roi);
}
@ -427,8 +458,8 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRangeBorderZero) {
"tensor/testdata/image_to_tensor/input.jpg"),
GetRgb("/mediapipe/calculators/"
"tensor/testdata/image_to_tensor/noop_except_range.png"),
/*range_min=*/0.0f,
/*range_max=*/1.0f,
/*float_range=*/{0.0f, 1.0f},
/*int_range=*/{0, 255},
/*tensor_width=*/64, /*tensor_height=*/128, /*keep_aspect=*/true,
BorderMode::kZero, roi);
}

View File

@ -16,7 +16,7 @@
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array>
#include <memory>
@ -339,4 +339,4 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -17,7 +17,7 @@
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <memory>
@ -37,6 +37,6 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_TEXTURE_H_

View File

@ -2,7 +2,7 @@
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array>
#include <memory>
@ -85,4 +85,4 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context) {
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -3,7 +3,7 @@
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include <array>
#include <memory>
@ -40,6 +40,6 @@ bool IsGlClampToBorderSupported(const mediapipe::GlContext& gl_context);
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_GL_UTILS_H_

View File

@ -1,6 +1,6 @@
#include "mediapipe/framework/port.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include "mediapipe/calculators/tensor/image_to_tensor_converter_gl_utils.h"
#include "mediapipe/framework/port/gtest.h"
@ -46,4 +46,4 @@ TEST(ImageToTensorConverterGlUtilsTest, GlTexParameteriOverrider) {
} // namespace
} // namespace mediapipe
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_20
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

View File

@ -35,7 +35,8 @@ namespace {
class OpenCvProcessor : public ImageToTensorConverter {
public:
OpenCvProcessor(BorderMode border_mode) {
OpenCvProcessor(BorderMode border_mode, Tensor::ElementType tensor_type)
: tensor_type_(tensor_type) {
switch (border_mode) {
case BorderMode::kReplicate:
border_mode_ = cv::BORDER_REPLICATE;
@ -44,6 +45,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
border_mode_ = cv::BORDER_CONSTANT;
break;
}
mat_type_ = tensor_type == Tensor::ElementType::kUInt8 ? CV_8UC3 : CV_32FC3;
}
absl::StatusOr<Tensor> Convert(const mediapipe::Image& input,
@ -56,15 +58,20 @@ class OpenCvProcessor : public ImageToTensorConverter {
absl::StrCat("Only RGBA/RGB formats are supported, passed format: ",
static_cast<uint32_t>(input.image_format())));
}
cv::Mat src = mediapipe::formats::MatView(&input);
auto src = mediapipe::formats::MatView(&input);
constexpr int kNumChannels = 3;
Tensor tensor(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, output_dims.height, output_dims.width, kNumChannels});
Tensor tensor(tensor_type_, Tensor::Shape{1, output_dims.height,
output_dims.width, kNumChannels});
auto buffer_view = tensor.GetCpuWriteView();
cv::Mat dst(output_dims.height, output_dims.width, CV_32FC3,
cv::Mat dst;
if (tensor_type_ == Tensor::ElementType::kUInt8) {
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
buffer_view.buffer<uint8>());
} else {
dst = cv::Mat(output_dims.height, output_dims.width, mat_type_,
buffer_view.buffer<float>());
}
const cv::RotatedRect rotated_rect(cv::Point2f(roi.center_x, roi.center_y),
cv::Size2f(roi.width, roi.height),
@ -85,7 +92,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
cv::Mat projection_matrix =
cv::getPerspectiveTransform(src_points, dst_points);
cv::Mat transformed;
cv::warpPerspective(src, transformed, projection_matrix,
cv::warpPerspective(*src, transformed, projection_matrix,
cv::Size(dst_width, dst_height),
/*flags=*/cv::INTER_LINEAR,
/*borderMode=*/border_mode_);
@ -102,19 +109,22 @@ class OpenCvProcessor : public ImageToTensorConverter {
auto transform,
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
transformed.convertTo(dst, mat_type_, transform.scale, transform.offset);
return tensor;
}
private:
enum cv::BorderTypes border_mode_;
Tensor::ElementType tensor_type_;
int mat_type_;
};
} // namespace
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
CalculatorContext* cc, BorderMode border_mode) {
return absl::make_unique<OpenCvProcessor>(border_mode);
CalculatorContext* cc, BorderMode border_mode,
Tensor::ElementType tensor_type) {
return absl::make_unique<OpenCvProcessor>(border_mode, tensor_type);
}
} // namespace mediapipe

View File

@ -25,7 +25,8 @@ namespace mediapipe {
// Creates OpenCV image-to-tensor converter.
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
CalculatorContext* cc, BorderMode border_mode);
CalculatorContext* cc, BorderMode border_mode,
Tensor::ElementType tensor_type);
} // namespace mediapipe

View File

@ -36,6 +36,7 @@ class InferenceCalculatorSelectorImpl
Subgraph::GetOptions<mediapipe::InferenceCalculatorOptions>(
subgraph_node);
std::vector<absl::string_view> impls;
const bool should_use_gpu =
!options.has_delegate() || // Use GPU delegate if not specified
(options.has_delegate() && options.delegate().has_gpu());

View File

@ -81,6 +81,7 @@ class InferenceCalculatorCpuImpl
Packet<TfLiteModelPtr> model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteDelegatePtr delegate_;
bool has_quantized_input_;
};
absl::Status InferenceCalculatorCpuImpl::UpdateContract(
@ -109,11 +110,19 @@ absl::Status InferenceCalculatorCpuImpl::Process(CalculatorContext* cc) {
for (int i = 0; i < input_tensors.size(); ++i) {
const Tensor* input_tensor = &input_tensors[i];
auto input_tensor_view = input_tensor->GetCpuReadView();
if (has_quantized_input_) {
// TODO: Support more quantized tensor types.
auto input_tensor_buffer = input_tensor_view.buffer<uint8>();
uint8* local_tensor_buffer = interpreter_->typed_input_tensor<uint8>(i);
std::memcpy(local_tensor_buffer, input_tensor_buffer,
input_tensor->bytes());
} else {
auto input_tensor_buffer = input_tensor_view.buffer<float>();
float* local_tensor_buffer = interpreter_->typed_input_tensor<float>(i);
std::memcpy(local_tensor_buffer, input_tensor_buffer,
input_tensor->bytes());
}
}
// Run inference.
RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk);
@ -167,10 +176,9 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegateAndAllocateTensors(
// AllocateTensors() can be called only after ModifyGraphWithDelegate.
RET_CHECK_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// TODO: Support quantized tensors.
RET_CHECK_NE(
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type,
kTfLiteAffineQuantization);
has_quantized_input_ =
interpreter_->tensor(interpreter_->inputs()[0])->quantization.type ==
kTfLiteAffineQuantization;
return absl::OkStatus();
}
@ -226,7 +234,7 @@ absl::Status InferenceCalculatorCpuImpl::LoadDelegate(CalculatorContext* cc) {
#endif // defined(__EMSCRIPTEN__)
if (use_xnnpack) {
TfLiteXNNPackDelegateOptions xnnpack_opts{};
auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
xnnpack_opts.num_threads =
GetXnnpackNumThreads(opts_has_delegate, opts_delegate);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),

View File

@ -154,8 +154,9 @@ TEST_P(InferenceCalculatorTest, TestFaceDetection) {
detection_packets[0].Get<std::vector<Detection>>();
#if !defined(MEDIAPIPE_PROTO_LITE)
// Approximately is not available with lite protos (b/178137094).
EXPECT_THAT(dets,
ElementsAre(Approximately(EqualsProto(expected_detection))));
constexpr float kEpison = 0.001;
EXPECT_THAT(dets, ElementsAre(Approximately(EqualsProto(expected_detection),
kEpison)));
#endif
}

View File

@ -59,8 +59,6 @@ class InferenceCalculatorGlImpl
// TfLite requires us to keep the model alive as long as the interpreter is.
Packet<TfLiteModelPtr> model_packet_;
std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteDelegatePtr delegate_;
#if MEDIAPIPE_TFLITE_GL_INFERENCE
mediapipe::GlCalculatorHelper gpu_helper_;
@ -72,6 +70,9 @@ class InferenceCalculatorGlImpl
tflite_gpu_runner_usage_;
#endif // MEDIAPIPE_TFLITE_GL_INFERENCE
TfLiteDelegatePtr delegate_;
std::unique_ptr<tflite::Interpreter> interpreter_;
#if MEDIAPIPE_TFLITE_GPU_SUPPORTED
std::vector<Tensor::Shape> output_shapes_;
std::vector<std::unique_ptr<Tensor>> gpu_buffers_in_;
@ -252,12 +253,17 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
gpu_buffers_in_.clear();
gpu_buffers_out_.clear();
return absl::OkStatus();
}));
}
// Delegate must outlive the interpreter, hence the order is important.
interpreter_ = nullptr;
delegate_ = nullptr;
return absl::OkStatus();
}));
} else {
// Delegate must outlive the interpreter, hence the order is important.
interpreter_ = nullptr;
delegate_ = nullptr;
}
return absl::OkStatus();
}

View File

@ -266,6 +266,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
auto raw_box_tensor = &input_tensors[0];
RET_CHECK_EQ(raw_box_tensor->shape().dims.size(), 3);
RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_);
RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_);
auto raw_score_tensor = &input_tensors[1];
@ -385,6 +386,7 @@ absl::Status TensorsToDetectionsCalculator::ProcessGPU(
CalculatorContext* cc, std::vector<Detection>* output_detections) {
const auto& input_tensors = *kInTensors(cc);
RET_CHECK_GE(input_tensors.size(), 2);
RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
#ifndef MEDIAPIPE_DISABLE_GL_COMPUTE
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &input_tensors, &cc,
@ -563,7 +565,6 @@ absl::Status TensorsToDetectionsCalculator::LoadOptions(CalculatorContext* cc) {
// Get calculator options specified in the graph.
options_ = cc->Options<::mediapipe::TensorsToDetectionsCalculatorOptions>();
RET_CHECK(options_.has_num_classes());
RET_CHECK(options_.has_num_boxes());
RET_CHECK(options_.has_num_coords());
num_classes_ = options_.num_classes();

View File

@ -355,9 +355,10 @@ absl::Status TensorsToSegmentationCalculator::ProcessCpu(
std::shared_ptr<ImageFrame> mask_frame = std::make_shared<ImageFrame>(
ImageFormat::VEC32F1, output_width, output_height);
std::unique_ptr<Image> output_mask = absl::make_unique<Image>(mask_frame);
cv::Mat output_mat = formats::MatView(output_mask.get());
auto output_mat = formats::MatView(output_mask.get());
// Upsample small mask into output.
cv::resize(small_mask_mat, output_mat, cv::Size(output_width, output_height));
cv::resize(small_mask_mat, *output_mat,
cv::Size(output_width, output_height));
cc->Outputs().Tag(kMaskTag).Add(output_mask.release(), cc->InputTimestamp());
return absl::OkStatus();

View File

@ -334,6 +334,7 @@ cc_library(
":image_frame_to_tensor_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:core_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
] + select({

View File

@ -17,6 +17,7 @@
#include "mediapipe/calculators/tensorflow/image_frame_to_tensor_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/proto_ns.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
@ -32,7 +33,10 @@ namespace {
// Convert the ImageFrame into Tensor with floating point value type.
// The value will be normalized based on mean and stddev.
std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
const ImageFrame& image_frame, float mean, float stddev) {
// const ImageFrame& image_frame, float mean, float stddev) {
const ImageFrame& image_frame,
const mediapipe::proto_ns::RepeatedField<float>& mean,
const mediapipe::proto_ns::RepeatedField<float>& stddev) {
const int cols = image_frame.Width();
const int rows = image_frame.Height();
const int channels = image_frame.NumberOfChannels();
@ -45,7 +49,20 @@ std::unique_ptr<tf::Tensor> ImageFrameToNormalizedTensor(
for (int row = 0; row < rows; ++row) {
for (int col = 0; col < cols; ++col) {
for (int channel = 0; channel < channels; ++channel) {
tensor_data(row, col, channel) = (pixel[channel] - mean) / stddev;
float mean_value = 0;
if (mean.size() > 1) {
mean_value = mean[channel];
} else if (!mean.empty()) {
mean_value = mean[0];
}
float stddev_value = 1;
if (stddev.size() > 1) {
stddev_value = stddev[channel];
} else if (!stddev.empty()) {
stddev_value = stddev[0];
}
tensor_data(row, col, channel) =
(pixel[channel] - mean_value) / stddev_value;
}
pixel += channels;
}
@ -126,7 +143,18 @@ absl::Status ImageFrameToTensorCalculator::Process(CalculatorContext* cc) {
const tf::DataType data_type = options_.data_type();
RET_CHECK_EQ(data_type, tf::DT_FLOAT)
<< "Unsupported data type " << data_type;
RET_CHECK_GT(options_.stddev(), 0.0f);
RET_CHECK_GT(options_.stddev().size(), 0) << "You must set a stddev.";
RET_CHECK_GT(options_.stddev()[0], 0.0f) << "The stddev cannot be zero.";
if (options_.stddev().size() > 1) {
RET_CHECK_EQ(options_.stddev().size(), video_frame.NumberOfChannels())
<< "If specifying multiple stddev normalization values, "
<< "the number must match the number of image channels.";
}
if (options_.mean().size() > 1) {
RET_CHECK_EQ(options_.mean().size(), video_frame.NumberOfChannels())
<< "If specifying multiple mean normalization values, "
<< "the number must match the number of image channels.";
}
tensor = ImageFrameToNormalizedTensor(video_frame, options_.mean(),
options_.stddev());
} else {

View File

@ -32,6 +32,6 @@ message ImageFrameToTensorCalculatorOptions {
// If set, the output tensor T is equal to (F - mean * J) / stddev, where F
// and J are the input image frame and the all-ones matrix of the same size,
// respectively. Otherwise, T is equal to F.
optional float mean = 2;
optional float stddev = 3;
repeated float mean = 2;
repeated float stddev = 3;
}

View File

@ -454,4 +454,32 @@ TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithMeanAndStddev) {
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
}
TEST_F(ImageFrameToTensorCalculatorTest, FixedRGBFrameWithRepeatMeanAndStddev) {
runner_ = ::absl::make_unique<CalculatorRunner>(
"ImageFrameToTensorCalculator",
"[mediapipe.ImageFrameToTensorCalculatorOptions.ext]"
"{data_type:DT_FLOAT mean:128.0 mean:128.0 mean:128.0 "
" stddev:128.0 stddev:128.0 stddev:128.0}",
1, 1, 0);
// Create a single pixel image of fixed color #0080ff.
auto image_frame = ::absl::make_unique<ImageFrame>(ImageFormat::SRGB, 1, 1);
const uint8 color[] = {0, 128, 255};
SetToColor<uint8>(color, image_frame.get());
runner_->MutableInputs()->Index(0).packets.push_back(
Adopt(image_frame.release()).At(Timestamp(0)));
MP_ASSERT_OK(runner_->Run());
const auto& tensor = runner_->Outputs().Index(0).packets[0].Get<tf::Tensor>();
EXPECT_EQ(tensor.dtype(), tf::DT_FLOAT);
ASSERT_EQ(tensor.dims(), 3);
EXPECT_EQ(tensor.shape().dim_size(0), 1);
EXPECT_EQ(tensor.shape().dim_size(1), 1);
EXPECT_EQ(tensor.shape().dim_size(2), 3);
const float* actual = tensor.flat<float>().data();
EXPECT_EQ(actual[0], -1.0f); // ( 0 - 128) / 128
EXPECT_EQ(actual[1], 0.0f); // (128 - 128) / 128
EXPECT_EQ(actual[2], 127.0f / 128.0f); // (255 - 128) / 128
}
} // namespace mediapipe

View File

@ -70,10 +70,10 @@ const int kNumCoordsPerBox = 4;
// image/understanding/object_detection/export_inference_graph.py
//
// By default, the output Detections store label ids (integers) for each
// detection. Optionally, a label map (of the form std::map<int, std::string>
// detection. Optionally, a label map (of the form std::map<int, string>
// mapping label ids to label names as strings) can be made available as an
// input side packet, in which case the output Detections store
// labels as their associated std::string provided by the label map.
// labels as their associated string provided by the label map.
//
// Usage example:
// node {

View File

@ -59,7 +59,7 @@ namespace mpms = mediapipe::mediasequence;
// bounding boxes from vector<Detections>, and streams with the
// "FLOAT_FEATURE_${NAME}" pattern, which stores the values from vector<float>'s
// associated with the name ${NAME}. "KEYPOINTS" stores a map of 2D keypoints
// from flat_hash_map<std::string, vector<pair<float, float>>>. "IMAGE_${NAME}",
// from flat_hash_map<string, vector<pair<float, float>>>. "IMAGE_${NAME}",
// "BBOX_${NAME}", and "KEYPOINTS_${NAME}" will also store prefixed versions of
// each stream, which allows for multiple image streams to be included. However,
// the default names are suppored by more tools.

View File

@ -28,7 +28,7 @@
// output_side_packet: "SEQUENCE_EXAMPLE:sequence_example"
// }
//
// Example converting to std::string in Close():
// Example converting to string in Close():
// node {
// calculator: "StringToSequenceExampleCalculator"
// input_side_packet: "SEQUENCE_EXAMPLE:sequence_example"

View File

@ -302,9 +302,8 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
<< "To use recurrent_tag_pairs, batch_size must be 1.";
for (const auto& tag_pair : options_.recurrent_tag_pair()) {
const std::vector<std::string> tags = absl::StrSplit(tag_pair, ':');
RET_CHECK_EQ(tags.size(), 2)
<< "recurrent_tag_pair must be a colon "
"separated std::string with two components: "
RET_CHECK_EQ(tags.size(), 2) << "recurrent_tag_pair must be a colon "
"separated string with two components: "
<< tag_pair;
RET_CHECK(mediapipe::ContainsKey(tag_to_tensor_map_, tags[0]))
<< "Can't find tag '" << tags[0] << "' in signature "

View File

@ -86,7 +86,7 @@ class TensorFlowSessionFromFrozenGraphCalculator : public CalculatorBase {
cc->InputSidePackets()
.Tag(kStringModelFilePathTag)
.Set<std::string>(
// Filename of std::string model.
// Filename of string model.
);
}
cc->OutputSidePackets()

View File

@ -84,7 +84,7 @@ class TensorFlowSessionFromFrozenGraphGenerator : public PacketGenerator {
} else if (input_side_packets->HasTag(kStringModelFilePathTag)) {
input_side_packets->Tag(kStringModelFilePathTag)
.Set<std::string>(
// Filename of std::string model.
// Filename of string model.
);
}
output_side_packets->Tag(kSessionTag)

View File

@ -69,6 +69,8 @@ const std::string MaybeConvertSignatureToTag(
[](unsigned char c) { return std::toupper(c); });
output = absl::StrReplaceAll(output, {{"/", "_"}});
output = absl::StrReplaceAll(output, {{"-", "_"}});
output = absl::StrReplaceAll(output, {{".", "_"}});
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
return output;
} else {
return name;

View File

@ -71,6 +71,8 @@ const std::string MaybeConvertSignatureToTag(
[](unsigned char c) { return std::toupper(c); });
output = absl::StrReplaceAll(output, {{"/", "_"}});
output = absl::StrReplaceAll(output, {{"-", "_"}});
output = absl::StrReplaceAll(output, {{".", "_"}});
LOG(INFO) << "Renamed TAG from: " << name << " to " << output;
return output;
} else {
return name;

View File

@ -939,7 +939,7 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
#if !defined(MEDIAPIPE_EDGE_TPU)
if (use_xnnpack) {
TfLiteXNNPackDelegateOptions xnnpack_opts{};
auto xnnpack_opts = TfLiteXNNPackDelegateOptionsDefault();
xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
&TfLiteXNNPackDelegateDelete);

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "absl/strings/str_format.h"
@ -558,7 +559,7 @@ uniform ivec2 out_size;
const int output_layer_index = int($1);
const float combine_with_previous_ratio = float($2);
// Will be replaced with either '#define READ_PREVIOUS' or empty std::string
// Will be replaced with either '#define READ_PREVIOUS' or empty string
$3 //DEFINE_READ_PREVIOUS
void main() {

View File

@ -51,6 +51,7 @@ class LocalFilePatternContentsCalculator : public CalculatorBase {
cc->InputSidePackets().Tag(kFileDirectoryTag).Get<std::string>(),
cc->InputSidePackets().Tag(kFileSuffixTag).Get<std::string>(),
&filenames_));
std::sort(filenames_.begin(), filenames_.end());
return absl::OkStatus();
}

View File

@ -129,8 +129,8 @@ TEST(PacketFrequencyCalculatorTest, MultiPacketTest) {
// Tests packet frequency with multiple input/output streams.
TEST(PacketFrequencyCalculatorTest, MultiStreamTest) {
// Setup the calculator runner and provide strings as input on all streams
// (note that it doesn't have to be std::string; the calculator can take any
// type as input).
// (note that it doesn't have to be string; the calculator can take any type
// as input).
CalculatorRunner runner(GetNodeWithMultipleStreams());
// Packet 1 on stream 1.

View File

@ -37,6 +37,13 @@ RenderAnnotation::Rectangle* NewRect(
annotation->mutable_color()->set_b(options.color().b());
annotation->set_thickness(options.thickness());
if (options.has_top_left_thickness()) {
CHECK(!options.oval());
CHECK(!options.filled());
annotation->mutable_rectangle()->set_top_left_thickness(
options.top_left_thickness());
}
return options.oval() ? options.filled()
? annotation->mutable_filled_oval()
->mutable_oval()
@ -136,6 +143,11 @@ absl::Status RectToRenderDataCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<RectToRenderDataCalculatorOptions>();
if (options_.has_top_left_thickness()) {
// Filled and oval don't support top_left_thickness.
RET_CHECK(!options_.filled());
RET_CHECK(!options_.oval());
}
return absl::OkStatus();
}

View File

@ -35,4 +35,8 @@ message RectToRenderDataCalculatorOptions {
// Whether the rendered rectangle should be an oval.
optional bool oval = 4 [default = false];
// Radius of top left corner circle. Only supported for oval=false,
// filled=false.
optional double top_left_thickness = 5;
}

View File

@ -48,8 +48,8 @@ constexpr char kTopKIndexesTag[] = "TOP_K_INDEXES";
constexpr char kScoresTag[] = "SCORES";
// A calculator that takes a vector of scores and returns the indexes, scores,
// labels of the top k elements, classification protos, and summary std::string
// (in csv format).
// labels of the top k elements, classification protos, and summary string (in
// csv format).
//
// Usage example:
// node {

View File

@ -76,7 +76,7 @@ constexpr char kTrackingTag[] = "TRACKING";
// IMAGE_SIZE: Input image dimension.
// TRACKED_BOXES : input box tracking result (proto TimedBoxProtoList) from
// BoxTrackerCalculator.
// ADD_INDEX: Optional std::string containing binary format proto of type
// ADD_INDEX: Optional string containing binary format proto of type
// BoxDetectorIndex. Used for adding target index to the detector
// search index during runtime.
// CANCEL_OBJECT_ID: Optional id of box to be removed. This is recommended
@ -91,8 +91,7 @@ constexpr char kTrackingTag[] = "TRACKING";
// BOXES: Optional output stream of type TimedBoxProtoList for each lost box.
//
// Imput side packets:
// INDEX_PROTO_STRING: Optional std::string containing binary format proto of
// type
// INDEX_PROTO_STRING: Optional string containing binary format proto of type
// BoxDetectorIndex. Used for initializing box_detector
// with predefined template images.
// FRAME_ALIGNMENT: Optional integer to indicate alignment_boundary for

View File

@ -15,6 +15,7 @@
#include <stdio.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
@ -78,7 +79,7 @@ const char kOptionsTag[] = "OPTIONS";
// TrackingData and added to current set of tracked boxes.
// This is recommended to be used with SyncSetInputStreamHandler.
// START_POS_PROTO_STRING: Same as START_POS, but is in the form of serialized
// protobuffer std::string. When both START_POS and
// protobuffer string. When both START_POS and
// START_POS_PROTO_STRING are present, START_POS is used. Suggest
// to specify only one of them.
// RESTART_POS: Same as START_POS, but exclusively for receiving detection
@ -99,7 +100,7 @@ const char kOptionsTag[] = "OPTIONS";
// can be in arbitrary order.
// Use with SyncSetInputStreamHandler in streaming mode only.
// RA_TRACK_PROTO_STRING: Same as RA_TRACK, but is in the form of serialized
// protobuffer std::string. When both RA_TRACK and
// protobuffer string. When both RA_TRACK and
// RA_TRACK_PROTO_STRING are present, RA_TRACK is used. Suggest
// to specify only one of them.
//

View File

@ -15,6 +15,7 @@
#include <cmath>
#include <fstream>
#include <memory>
#include <string>
#include "absl/strings/numbers.h"
#include "absl/strings/str_split.h"

View File

@ -79,7 +79,7 @@ ImageFormat::Format GetImageFormat(int num_channels) {
// to be saved, specify an output side packet with tag "SAVED_AUDIO_PATH".
// The calculator will call FFmpeg binary to save audio tracks as an aac file.
// If the audio tracks can't be extracted by FFmpeg, the output side packet
// will contain an empty std::string.
// will contain an empty string.
//
// Example config:
// node {

View File

@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@ -10,6 +10,9 @@
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<!-- For profiling -->
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"

View File

@ -40,6 +40,7 @@ android_binary(
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
"//mediapipe/modules/pose_detection:pose_detection.tflite",

View File

@ -80,6 +80,7 @@ cc_library(
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
],
alwayslink = 1,
)

View File

@ -15,6 +15,7 @@
#include <algorithm>
#include <memory>
#include "absl/status/status.h"
#include "mediapipe/examples/desktop/autoflip/autoflip_messages.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.pb.h"
#include "mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_state.h"
@ -41,6 +42,7 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
// Can be used to control whether an animated zoom should actually performed
// (configured through option us_to_first_rect). If provided, a non-zero integer
// will allow the animated zoom to be used when the first detections arrive.
// Applies to first detection only.
constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
// Can be used to control the maximum zoom; note that it is re-evaluated only
// upon change of input resolution. A value of 100 disables zooming and is the
@ -112,6 +114,16 @@ class ContentZoomingCalculator : public CalculatorBase {
int* pan_offset, int* height);
// Sets max_frame_value_ and target_aspect_
absl::Status UpdateAspectAndMax();
// Smooth camera path
absl::Status SmoothAndClampPath(int target_width, int target_height,
float path_width, float path_height,
float* path_offset_x, float* path_offset_y);
// Compute box containing all detections.
absl::Status GetDetectionsBox(mediapipe::CalculatorContext* cc, float* xmin,
float* xmax, float* ymin, float* ymax,
bool* only_required_found,
bool* has_detections);
ContentZoomingCalculatorOptions options_;
// Detection frame width/height.
int frame_height_;
@ -537,68 +549,13 @@ absl::Status ContentZoomingCalculator::Process(
UpdateForResolutionChange(cc, frame_width, frame_height));
}
bool only_required_found = false;
// Compute the box that contains all "is_required" detections.
float xmin = 1, ymin = 1, xmax = 0, ymax = 0;
if (cc->Inputs().HasTag(kSalientRegions)) {
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>();
for (const auto& region : detection_set.detections()) {
if (!region.only_required()) {
continue;
}
only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
region, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
}
}
if (cc->Inputs().HasTag(kDetections)) {
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
if (last_only_required_detection_ == 0) {
// If no detections are available and we never had any,
// simply return the full-image rectangle as crop-rect.
if (cc->Outputs().HasTag(kCropRect)) {
auto default_rect = absl::make_unique<mediapipe::Rect>();
default_rect->set_x_center(frame_width_ / 2);
default_rect->set_y_center(frame_height_ / 2);
default_rect->set_width(frame_width_);
default_rect->set_height(frame_height_);
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
Timestamp(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
default_rect->set_x_center(0.5);
default_rect->set_y_center(0.5);
default_rect->set_width(1.0);
default_rect->set_height(1.0);
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
}
// Also provide a first crop rect: in this case a zero-sized one.
if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs()
.Tag(kFirstCropRect)
.Add(new mediapipe::NormalizedRect(),
Timestamp(cc->InputTimestamp()));
}
return absl::OkStatus();
}
} else {
auto raw_detections = cc->Inputs()
.Tag(kDetections)
.Get<std::vector<mediapipe::Detection>>();
for (const auto& detection : raw_detections) {
only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
detection, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), &xmin, &xmax, &ymin, &ymax));
}
}
}
bool only_required_found = false;
bool has_detections = true;
MP_RETURN_IF_ERROR(GetDetectionsBox(cc, &xmin, &xmax, &ymin, &ymax,
&only_required_found, &has_detections));
if (!has_detections) return absl::OkStatus();
const bool may_start_animation = (options_.us_to_first_rect() != 0) &&
(!cc->Inputs().HasTag(kAnimateZoom) ||
@ -656,7 +613,8 @@ absl::Status ContentZoomingCalculator::Process(
path_solver_zoom_->ClearHistory();
}
const bool camera_active =
is_animating || pan_state || tilt_state || zoom_state;
is_animating || ((pan_state || tilt_state || zoom_state) &&
!options_.disable_animations());
// Waiting for first rect before setting any value of the camera active flag
// so we avoid setting it to false during initialization.
if (cc->Outputs().HasTag(kCameraActive) &&
@ -666,17 +624,26 @@ absl::Status ContentZoomingCalculator::Process(
.AddPacket(MakePacket<bool>(camera_active).At(cc->InputTimestamp()));
}
// Skip the path solvers to the final destination if not animating.
const bool disable_animations =
options_.disable_animations() && path_solver_zoom_->IsInitialized();
if (disable_animations) {
MP_RETURN_IF_ERROR(path_solver_zoom_->SetState(height));
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(offset_y));
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(offset_x));
}
// Compute smoothed zoom camera path.
MP_RETURN_IF_ERROR(path_solver_zoom_->AddObservation(
height, cc->InputTimestamp().Microseconds()));
float path_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetState(&path_height));
float path_width = path_height * target_aspect_;
const float path_width = path_height * target_aspect_;
// Update pixel-per-degree value for pan/tilt.
int target_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetTargetPosition(&target_height));
int target_width = target_height * target_aspect_;
const int target_width = target_height * target_aspect_;
MP_RETURN_IF_ERROR(path_solver_pan_->UpdatePixelsPerDegree(
static_cast<float>(target_width) / kFieldOfView));
MP_RETURN_IF_ERROR(path_solver_tilt_->UpdatePixelsPerDegree(
@ -692,66 +659,16 @@ absl::Status ContentZoomingCalculator::Process(
float path_offset_y;
MP_RETURN_IF_ERROR(path_solver_tilt_->GetState(&path_offset_y));
float delta_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
int delta_width = delta_height * target_aspect_;
// Smooth centering when zooming out.
float remaining_width = target_width - path_width;
int width_space = frame_width_ - target_width;
if (abs(path_offset_x - frame_width_ / 2) >
width_space / 2 + kPixelTolerance &&
remaining_width > kPixelTolerance) {
float required_width =
abs(path_offset_x - frame_width_ / 2) - width_space / 2;
if (path_offset_x < frame_width_ / 2) {
path_offset_x += delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
} else {
path_offset_x -= delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
}
}
float remaining_height = target_height - path_height;
int height_space = frame_height_ - target_height;
if (abs(path_offset_y - frame_height_ / 2) >
height_space / 2 + kPixelTolerance &&
remaining_height > kPixelTolerance) {
float required_height =
abs(path_offset_y - frame_height_ / 2) - height_space / 2;
if (path_offset_y < frame_height_ / 2) {
path_offset_y += delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
} else {
path_offset_y -= delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
}
}
// Prevent box from extending beyond the image after camera smoothing.
if (path_offset_y - ceil(path_height / 2.0) < 0) {
path_offset_y = ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
} else if (path_offset_y + ceil(path_height / 2.0) > frame_height_) {
path_offset_y = frame_height_ - ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(path_offset_y));
}
if (path_offset_x - ceil(path_width / 2.0) < 0) {
path_offset_x = ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
} else if (path_offset_x + ceil(path_width / 2.0) > frame_width_) {
path_offset_x = frame_width_ - ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(path_offset_x));
}
// Convert to top/bottom borders to remove.
int path_top = path_offset_y - path_height / 2;
int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
// Update path.
MP_RETURN_IF_ERROR(SmoothAndClampPath(target_width, target_height, path_width,
path_height, &path_offset_x,
&path_offset_y));
// Transmit result downstream to scenecroppingcalculator.
if (cc->Outputs().HasTag(kDetectedBorders)) {
// Convert to top/bottom borders to remove.
const int path_top = path_offset_y - path_height / 2;
const int path_bottom = frame_height_ - (path_offset_y + path_height / 2);
std::unique_ptr<StaticFeatures> features =
absl::make_unique<StaticFeatures>();
MakeStaticFeatures(path_top, path_bottom, frame_width_, frame_height_,
@ -798,8 +715,8 @@ absl::Status ContentZoomingCalculator::Process(
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
std::unique_ptr<mediapipe::NormalizedRect> gpu_rect =
absl::make_unique<mediapipe::NormalizedRect>();
float float_frame_width = static_cast<float>(frame_width_);
float float_frame_height = static_cast<float>(frame_height_);
const float float_frame_width = static_cast<float>(frame_width_);
const float float_frame_height = static_cast<float>(frame_height_);
if (is_animating) {
auto rect =
GetAnimationRect(frame_width, frame_height, cc->InputTimestamp());
@ -829,5 +746,130 @@ absl::Status ContentZoomingCalculator::Process(
return absl::OkStatus();
}
absl::Status ContentZoomingCalculator::SmoothAndClampPath(
int target_width, int target_height, float path_width, float path_height,
float* path_offset_x, float* path_offset_y) {
float delta_height;
MP_RETURN_IF_ERROR(path_solver_zoom_->GetDeltaState(&delta_height));
const int delta_width = delta_height * target_aspect_;
// Smooth centering when zooming out.
const float remaining_width = target_width - path_width;
const int width_space = frame_width_ - target_width;
if (abs(*path_offset_x - frame_width_ / 2) >
width_space / 2 + kPixelTolerance &&
remaining_width > kPixelTolerance) {
const float required_width =
abs(*path_offset_x - frame_width_ / 2) - width_space / 2;
if (*path_offset_x < frame_width_ / 2) {
*path_offset_x += delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
} else {
*path_offset_x -= delta_width * (required_width / remaining_width);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
}
}
const float remaining_height = target_height - path_height;
const int height_space = frame_height_ - target_height;
if (abs(*path_offset_y - frame_height_ / 2) >
height_space / 2 + kPixelTolerance &&
remaining_height > kPixelTolerance) {
const float required_height =
abs(*path_offset_y - frame_height_ / 2) - height_space / 2;
if (*path_offset_y < frame_height_ / 2) {
*path_offset_y += delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
} else {
*path_offset_y -= delta_height * (required_height / remaining_height);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
}
}
// Prevent box from extending beyond the image after camera smoothing.
if (*path_offset_y - ceil(path_height / 2.0) < 0) {
*path_offset_y = ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
} else if (*path_offset_y + ceil(path_height / 2.0) > frame_height_) {
*path_offset_y = frame_height_ - ceil(path_height / 2.0);
MP_RETURN_IF_ERROR(path_solver_tilt_->SetState(*path_offset_y));
}
if (*path_offset_x - ceil(path_width / 2.0) < 0) {
*path_offset_x = ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
} else if (*path_offset_x + ceil(path_width / 2.0) > frame_width_) {
*path_offset_x = frame_width_ - ceil(path_width / 2.0);
MP_RETURN_IF_ERROR(path_solver_pan_->SetState(*path_offset_x));
}
return absl::OkStatus();
}
absl::Status ContentZoomingCalculator::GetDetectionsBox(
mediapipe::CalculatorContext* cc, float* xmin, float* xmax, float* ymin,
float* ymax, bool* only_required_found, bool* has_detections) {
if (cc->Inputs().HasTag(kSalientRegions)) {
auto detection_set = cc->Inputs().Tag(kSalientRegions).Get<DetectionSet>();
for (const auto& region : detection_set.detections()) {
if (!region.only_required()) {
continue;
}
*only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
region, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
}
}
if (cc->Inputs().HasTag(kDetections)) {
if (cc->Inputs().Tag(kDetections).IsEmpty()) {
if (last_only_required_detection_ == 0) {
// If no detections are available and we never had any,
// simply return the full-image rectangle as crop-rect.
if (cc->Outputs().HasTag(kCropRect)) {
auto default_rect = absl::make_unique<mediapipe::Rect>();
default_rect->set_x_center(frame_width_ / 2);
default_rect->set_y_center(frame_height_ / 2);
default_rect->set_width(frame_width_);
default_rect->set_height(frame_height_);
cc->Outputs().Tag(kCropRect).Add(default_rect.release(),
Timestamp(cc->InputTimestamp()));
}
if (cc->Outputs().HasTag(kNormalizedCropRect)) {
auto default_rect = absl::make_unique<mediapipe::NormalizedRect>();
default_rect->set_x_center(0.5);
default_rect->set_y_center(0.5);
default_rect->set_width(1.0);
default_rect->set_height(1.0);
cc->Outputs()
.Tag(kNormalizedCropRect)
.Add(default_rect.release(), Timestamp(cc->InputTimestamp()));
}
// Also provide a first crop rect: in this case a zero-sized one.
if (cc->Outputs().HasTag(kFirstCropRect)) {
cc->Outputs()
.Tag(kFirstCropRect)
.Add(new mediapipe::NormalizedRect(),
Timestamp(cc->InputTimestamp()));
}
*has_detections = false;
return absl::OkStatus();
}
} else {
auto raw_detections = cc->Inputs()
.Tag(kDetections)
.Get<std::vector<mediapipe::Detection>>();
for (const auto& detection : raw_detections) {
*only_required_found = true;
MP_RETURN_IF_ERROR(UpdateRanges(
detection, options_.detection_shift_vertical(),
options_.detection_shift_horizontal(), xmin, xmax, ymin, ymax));
}
}
}
return absl::OkStatus();
}
} // namespace autoflip
} // namespace mediapipe

View File

@ -19,7 +19,7 @@ package mediapipe.autoflip;
import "mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.proto";
import "mediapipe/framework/calculator.proto";
// NextTag: 18
// NextTag: 19
message ContentZoomingCalculatorOptions {
extend mediapipe.CalculatorOptions {
optional ContentZoomingCalculatorOptions ext = 313091992;
@ -71,6 +71,12 @@ message ContentZoomingCalculatorOptions {
// us_to_first_rect time budget.
optional int64 us_to_first_rect_delay = 16 [default = 0];
// When true, this flag disables animating camera motions,
// and cuts directly to final target position.
// Does not apply to the first instance (first detection will still animate).
// Use "ANIMATE_ZOOM" input stream to control the first animation.
optional bool disable_animations = 18;
// Deprecated parameters
optional KinematicOptions kinematic_options = 2 [deprecated = true];
optional int64 min_motion_to_reframe = 4 [deprecated = true];

View File

@ -56,7 +56,7 @@ constexpr char kRegionsTag[] = "REGIONS";
constexpr char kDetectionsTag[] = "DETECTIONS";
// Converts an object detection to a autoflip SignalType. Returns true if the
// std::string label has a autoflip label.
// string label has a autoflip label.
bool MatchType(const std::string& label, SignalType* type) {
if (label == "person") {
type->set_standard(SignalType::HUMAN);

View File

@ -182,7 +182,7 @@ namespace {
absl::Status ParseAspectRatioString(const std::string& aspect_ratio_string,
double* aspect_ratio) {
std::string error_msg =
"Aspect ratio std::string must be in the format of 'width:height', e.g. "
"Aspect ratio string must be in the format of 'width:height', e.g. "
"'1:1' or '5:4', your input was " +
aspect_ratio_string;
auto pos = aspect_ratio_string.find(':');

View File

@ -4,6 +4,7 @@ constexpr float kMinVelocity = 0.5;
namespace mediapipe {
namespace autoflip {
namespace {
int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
std::deque<int> positions;
@ -16,6 +17,7 @@ int Median(const std::deque<std::pair<uint64, int>>& positions_raw) {
return positions[n];
}
} // namespace
bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
if (options_.has_min_motion_to_reframe()) {
return abs(delta_degs) < options_.min_motion_to_reframe();
@ -25,7 +27,9 @@ bool KinematicPathSolver::IsMotionTooSmall(double delta_degs) {
return abs(delta_degs) < options_.min_motion_to_reframe_lower();
}
}
void KinematicPathSolver::ClearHistory() { raw_positions_at_time_.clear(); }
absl::Status KinematicPathSolver::PredictMotionState(int position,
const uint64 time_us,
bool* state) {
@ -48,6 +52,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
}
int filtered_position = Median(raw_positions_at_time_copy);
filtered_position =
std::clamp(filtered_position, min_location_, max_location_);
double delta_degs =
(filtered_position - current_position_px_) / pixels_per_degree_;
@ -59,6 +66,9 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
// If the motion is smaller than the reframe_window and camera is moving,
// don't use the update.
*state = false;
} else if (prior_position_px_ == current_position_px_ && motion_state_) {
// Camera isn't actually moving. Likely face is past bounds.
*state = false;
} else {
// Apply new position, plus the reframe window size.
*state = true;
@ -66,6 +76,7 @@ absl::Status KinematicPathSolver::PredictMotionState(int position,
return absl::OkStatus();
}
absl::Status KinematicPathSolver::AddObservation(int position,
const uint64 time_us) {
if (!initialized_) {
@ -181,18 +192,22 @@ absl::Status KinematicPathSolver::AddObservation(int position,
}
// Time and position updates.
double delta_t = (time_us - current_time_) / 1000000.0;
double delta_t_sec = (time_us - current_time_) / 1000000.0;
if (options_.max_delta_time_sec() > 0) {
// If updates are very infrequent, then limit the max time difference.
delta_t_sec = fmin(delta_t_sec, options_.max_delta_time_sec());
}
// Time since last state/prediction update, smoothed by
// mean_period_update_rate.
if (mean_delta_t_ < 0) {
mean_delta_t_ = delta_t;
mean_delta_t_ = delta_t_sec;
} else {
mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
delta_t * options_.mean_period_update_rate();
delta_t_sec * options_.mean_period_update_rate();
}
// Observed velocity and then weighted update of this velocity.
double observed_velocity = delta_degs / delta_t;
// Observed velocity and then weighted update of this velocity (deg/sec).
double observed_velocity = delta_degs / delta_t_sec;
double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
options_.max_update_rate());
double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
@ -253,7 +268,8 @@ absl::Status KinematicPathSolver::GetDeltaState(float* delta_position) {
absl::Status KinematicPathSolver::SetState(const float position) {
RET_CHECK(initialized_) << "SetState called before first observation added.";
current_position_px_ = position;
current_position_px_ = std::clamp(position, static_cast<float>(min_location_),
static_cast<float>(max_location_));
return absl::OkStatus();
}

View File

@ -71,6 +71,8 @@ class KinematicPathSolver {
// Provides the change in position from last state.
absl::Status GetDeltaState(float* delta_position);
bool IsInitialized() { return initialized_; }
private:
// Tuning options.
KinematicOptions options_;

View File

@ -31,6 +31,9 @@ message KinematicOptions {
optional int64 filtering_time_window_us = 7 [default = 0];
// Weighted update of average period, used for motion updates.
optional float mean_period_update_rate = 8 [default = 0.25];
// When set, caps the maximum time difference (seconds) calculated between new
// updates/observations. Useful when updates come very infrequently.
optional double max_delta_time_sec = 13;
// Scale factor for max velocity, to be multiplied by the distance from center
// in degrees. Cannot be used with max_velocity and must be used with
// max_velocity_shift.

View File

@ -419,6 +419,13 @@ TEST(KinematicPathSolverTest, PassSetPosition) {
MP_ASSERT_OK(solver.SetState(400));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 400);
// Expect to stay in bounds.
MP_ASSERT_OK(solver.SetState(600));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 500);
MP_ASSERT_OK(solver.SetState(-100));
MP_ASSERT_OK(solver.GetState(&state));
EXPECT_FLOAT_EQ(state, 0);
}
TEST(KinematicPathSolverTest, PassBorderTest) {
KinematicOptions options;

View File

@ -83,7 +83,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
const double in, const double out, Problem* problem, double* a, double* b,
double* c, double* d, double* k) {
// Creating a cost function, with 1D residual and 5 1D parameter blocks. This
// is what the "1, 1, 1, 1, 1, 1" std::string below means.
// is what the "1, 1, 1, 1, 1, 1" string below means.
CostFunction* cost_function =
new AutoDiffCostFunction<PolynomialResidual, 1, 1, 1, 1, 1, 1>(
new PolynomialResidual(in, out));

View File

@ -55,7 +55,8 @@ class SceneCameraMotionAnalyzer {
scene_camera_motion_analyzer_options)
: options_(scene_camera_motion_analyzer_options),
time_since_last_salient_region_us_(0),
has_solid_color_background_(false) {}
has_solid_color_background_(false),
total_scene_frames_(0) {}
~SceneCameraMotionAnalyzer() {}

View File

@ -44,7 +44,7 @@ absl::Status PrintHelloWorld() {
ASSIGN_OR_RETURN(OutputStreamPoller poller,
graph.AddOutputStreamPoller("out"));
MP_RETURN_IF_ERROR(graph.StartRun({}));
// Give 10 input packets that contains the same std::string "Hello World!".
// Give 10 input packets that contains the same string "Hello World!".
for (int i = 0; i < 10; ++i) {
MP_RETURN_IF_ERROR(graph.AddPacketToInputStream(
"in", MakePacket<std::string>("Hello World!").At(Timestamp(i))));
@ -52,7 +52,7 @@ absl::Status PrintHelloWorld() {
// Close the input stream "in".
MP_RETURN_IF_ERROR(graph.CloseInputStream("in"));
mediapipe::Packet packet;
// Get the output packets std::string.
// Get the output packets string.
while (poller.Next(&packet)) {
LOG(INFO) << packet.Get<std::string>();
}

View File

@ -72,6 +72,7 @@ objc_library(
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
],
features = ["-layering_check"],
sdk_frameworks = [
"AVFoundation",
"CoreGraphics",

View File

@ -58,6 +58,7 @@ objc_library(
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/holistic_landmark:hand_recrop.tflite",
"//mediapipe/modules/pose_detection:pose_detection.tflite",

View File

@ -150,6 +150,13 @@ mediapipe_proto_library(
deps = ["//mediapipe/framework:mediapipe_options_proto"],
)
config_setting(
name = "android_no_jni",
define_values = {"MEDIAPIPE_NO_JNI": "1"},
values = {"crosstool_top": "//external:android/crosstool"},
visibility = ["//visibility:public"],
)
cc_library(
name = "calculator_base",
srcs = ["calculator_base.cc"],
@ -712,6 +719,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"@com_google_absl//absl/memory",
"@com_google_absl//absl/synchronization",
],
)
@ -916,15 +924,19 @@ cc_library(
":packet",
":packet_set",
":type_map",
"//mediapipe/framework/deps:no_destructor",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:map_util",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:source_location",
"//mediapipe/framework/port:status",
"//mediapipe/framework/tool:status_util",
"//mediapipe/framework/tool:type_util",
"//mediapipe/framework/tool:validate_name",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
"@com_google_absl//absl/types:variant",
],
)

View File

@ -134,6 +134,7 @@ cc_test(
deps = [
":packet",
"//mediapipe/framework/port:gtest_main",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
],
)

View File

@ -313,8 +313,8 @@ template <class Calc>
class Node : public NodeBase {
public:
Node() : NodeBase(Calc::kCalculatorName) {}
// Overrides the built-in calculator type std::string with the provided
// argument. Can be used to create nodes from pure interfaces.
// Overrides the built-in calculator type string with the provided argument.
// Can be used to create nodes from pure interfaces.
// TODO: only use this for pure interfaces
Node(const std::string& type_override) : NodeBase(type_override) {}
@ -377,6 +377,29 @@ class PacketGenerator {
return *options_.MutableExtension(T::ext);
}
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
using PayloadT =
typename PortCommon<B, T, kIsOptional, kIsMultiple>::PayloadT;
if constexpr (std::is_same_v<B, SideOutputBase>) {
auto* base = &out_sides_[port.Tag()];
if constexpr (kIsMultiple) {
return MultiSideSource<PayloadT>(base);
} else {
return SideSource<PayloadT>(base);
}
} else if constexpr (std::is_same_v<B, SideInputBase>) {
auto* base = &in_sides_[port.Tag()];
if constexpr (kIsMultiple) {
return MultiSideDestination<PayloadT>(base);
} else {
return SideDestination<PayloadT>(base);
}
} else {
static_assert(dependent_false<B>::value, "Type not supported.");
}
}
private:
std::string type_;
TagIndexMap<DestinationBase> in_sides_;
@ -402,7 +425,7 @@ class Graph {
}
// Creates a node of a specific type. Should be used for pure interfaces,
// which do not have a built-in type std::string.
// which do not have a built-in type string.
template <class Calc>
Node<Calc>& AddNode(const std::string& type) {
auto node = std::make_unique<Node<Calc>>(type);

View File

@ -6,8 +6,8 @@
namespace mediapipe {
namespace api2 {
// This class stores a constant std::string that can be inspected at compile
// time in constexpr code.
// This class stores a constant string that can be inspected at compile time
// in constexpr code.
class const_str {
public:
constexpr const_str(std::size_t size, const char* data)

Some files were not shown because too many files have changed in this diff Show More