Project import generated by Copybara.

GitOrigin-RevId: 33adfdf31f3a5cbf9edc07ee1ea583e95080bdc5
This commit is contained in:
MediaPipe Team 2021-06-24 14:10:25 -07:00 committed by chuoling
parent b544a314b3
commit 139237092f
151 changed files with 4023 additions and 1001 deletions

View File

@ -8,6 +8,7 @@ include README.md
include requirements.txt
recursive-include mediapipe/modules *.tflite *.txt *.binarypb
exclude mediapipe/modules/face_detection/face_detection_full_range.tflite
exclude mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite
exclude mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite
exclude mediapipe/modules/objectron/object_detection_3d_sneakers.tflite

View File

@ -55,46 +55,22 @@ See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe offers customizable Python solutions as a prebuilt Python package on
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
`pip install mediapipe`. It also provides tools for users to build their own
solutions. Please see
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
for more info.
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
and desktop also in web browsers. The official API is under construction, but
the core technology has been proven effective. Please see
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
in Google Developers Blog for details.
You can use the following links to load a demo in the MediaPipe Visualizer, and
over there click the "Runner" icon in the top bar like shown below. The demos
use your webcam video as input, which is processed all locally in real-time and
never leaves your device.
![visualizer_runner](docs/images/visualizer_runner.png)
* [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection)
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
* [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking)
* [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection)
* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking)
* [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation)
## Getting started
Learn how to [install](https://google.github.io/mediapipe/getting_started/install)
MediaPipe and
[build example applications](https://google.github.io/mediapipe/getting_started/building_examples),
and start exploring our ready-to-use
[solutions](https://google.github.io/mediapipe/solutions/solutions) that you can
further extend and customize.
To start using MediaPipe
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
lines code, see example code and demos in
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
To use MediaPipe in C++, Android and iOS, which allow further customization of
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
building your own, learn how to
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
start building example applications in
[C++](https://google.github.io/mediapipe/getting_started/cpp),
[Android](https://google.github.io/mediapipe/getting_started/android) and
[iOS](https://google.github.io/mediapipe/getting_started/ios).
The source code is hosted in the
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can

View File

@ -351,8 +351,8 @@ maven_install(
"androidx.test.espresso:espresso-core:3.1.1",
"com.github.bumptech.glide:glide:4.11.0",
"com.google.android.material:material:aar:1.0.0-rc01",
"com.google.auto.value:auto-value:1.6.4",
"com.google.auto.value:auto-value-annotations:1.6.4",
"com.google.auto.value:auto-value:1.8.1",
"com.google.auto.value:auto-value-annotations:1.8.1",
"com.google.code.findbugs:jsr305:3.0.2",
"com.google.flogger:flogger-system-backend:0.3.1",
"com.google.flogger:flogger:0.3.1",

View File

@ -92,12 +92,12 @@ each project.
and copy
[the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41)
and
[the face detection tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite).
[the face detection tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range.tflite).
```bash
bazel build -c opt mediapipe/graphs/face_detection:face_detection_mobile_gpu_binary_graph
cp bazel-bin/mediapipe/graphs/face_detection/face_detection_mobile_gpu.binarypb /path/to/your/app/src/main/assets/
cp mediapipe/modules/face_detection/face_detection_front.tflite /path/to/your/app/src/main/assets/
cp mediapipe/modules/face_detection/face_detection_short_range.tflite /path/to/your/app/src/main/assets/
```
![Screenshot](../images/mobile/assets_location.png)
@ -117,7 +117,6 @@ each project.
implementation 'com.google.flogger:flogger-system-backend:0.3.1'
implementation 'com.google.code.findbugs:jsr305:3.0.2'
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library
def camerax_version = "1.0.0-beta10"
@ -125,7 +124,7 @@ each project.
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
// AutoValue
def auto_value_version = "1.6.4"
def auto_value_version = "1.8.1"
implementation "com.google.auto.value:auto-value-annotations:$auto_value_version"
annotationProcessor "com.google.auto.value:auto-value:$auto_value_version"
}

Binary file not shown.

View File

@ -55,46 +55,22 @@ See also
[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models)
for ML models released in MediaPipe.
## MediaPipe in Python
MediaPipe offers customizable Python solutions as a prebuilt Python package on
[PyPI](https://pypi.org/project/mediapipe/), which can be installed simply with
`pip install mediapipe`. It also provides tools for users to build their own
solutions. Please see
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python)
for more info.
## MediaPipe on the Web
MediaPipe on the Web is an effort to run the same ML solutions built for mobile
and desktop also in web browsers. The official API is under construction, but
the core technology has been proven effective. Please see
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
in Google Developers Blog for details.
You can use the following links to load a demo in the MediaPipe Visualizer, and
over there click the "Runner" icon in the top bar like shown below. The demos
use your webcam video as input, which is processed all locally in real-time and
never leaves your device.
![visualizer_runner](images/visualizer_runner.png)
* [MediaPipe Face Detection](https://viz.mediapipe.dev/demo/face_detection)
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
* [MediaPipe Hands](https://viz.mediapipe.dev/demo/hand_tracking)
* [MediaPipe Hands (palm/hand detection only)](https://viz.mediapipe.dev/demo/hand_detection)
* [MediaPipe Pose](https://viz.mediapipe.dev/demo/pose_tracking)
* [MediaPipe Hair Segmentation](https://viz.mediapipe.dev/demo/hair_segmentation)
## Getting started
Learn how to [install](https://google.github.io/mediapipe/getting_started/install)
MediaPipe and
[build example applications](https://google.github.io/mediapipe/getting_started/building_examples),
and start exploring our ready-to-use
[solutions](https://google.github.io/mediapipe/solutions/solutions) that you can
further extend and customize.
To start using MediaPipe
[solutions](https://google.github.io/mediapipe/solutions/solutions) with only a few
lines code, see example code and demos in
[MediaPipe in Python](https://google.github.io/mediapipe/getting_started/python) and
[MediaPipe in JavaScript](https://google.github.io/mediapipe/getting_started/javascript).
To use MediaPipe in C++, Android and iOS, which allow further customization of
the [solutions](https://google.github.io/mediapipe/solutions/solutions) as well as
building your own, learn how to
[install](https://google.github.io/mediapipe/getting_started/install) MediaPipe and
start building example applications in
[C++](https://google.github.io/mediapipe/getting_started/cpp),
[Android](https://google.github.io/mediapipe/getting_started/android) and
[iOS](https://google.github.io/mediapipe/getting_started/ios).
The source code is hosted in the
[MediaPipe Github repository](https://github.com/google/mediapipe), and you can

View File

@ -45,6 +45,15 @@ section.
Naming style and availability may differ slightly across platforms/languages.
#### model_selection
An integer index `0` or `1`. Use `0` to select a short-range model that works
best for faces within 2 meters from the camera, and `1` for a full-range model
best for faces within 5 meters. For the full-range option, a sparse model is
used for its improved inference speed. Please refer to the
[model cards](./models.md#face_detection) for details. Default to `0` if not
specified.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
@ -72,6 +81,7 @@ install MediaPipe Python package, then learn more in the companion
Supported configuration options:
* [model_selection](#model_selection)
* [min_detection_confidence](#min_detection_confidence)
```python
@ -83,7 +93,7 @@ mp_drawing = mp.solutions.drawing_utils
# For static images:
IMAGE_FILES = []
with mp_face_detection.FaceDetection(
min_detection_confidence=0.5) as face_detection:
model_selection=1, min_detection_confidence=0.5) as face_detection:
for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file)
# Convert the BGR image to RGB and process it with MediaPipe Face Detection.
@ -103,7 +113,7 @@ with mp_face_detection.FaceDetection(
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_face_detection.FaceDetection(
min_detection_confidence=0.5) as face_detection:
model_selection=0, min_detection_confidence=0.5) as face_detection:
while cap.isOpened():
success, image = cap.read()
if not success:
@ -139,6 +149,7 @@ and the following usage example.
Supported configuration options:
* [modelSelection](#model_selection)
* [minDetectionConfidence](#min_detection_confidence)
```html
@ -189,6 +200,7 @@ const faceDetection = new FaceDetection({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
}});
faceDetection.setOptions({
modelSelection: 0
minDetectionConfidence: 0.5
});
faceDetection.onResults(onResults);
@ -255,10 +267,6 @@ same configuration as the GPU pipeline, runs entirely on CPU.
* Target:
[`mediapipe/examples/desktop/face_detection:face_detection_gpu`](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/face_detection/BUILD)
### Web
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
### Coral
Please refer to

View File

@ -69,7 +69,7 @@ and renders using a dedicated
The
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
internally uses a
[face_detection_subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)
[face_detection_subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)
from the
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).

View File

@ -51,7 +51,14 @@ to visualize its associated subgraphs, please see
### Web
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
Use [this link](https://viz.mediapipe.dev/demo/hair_segmentation) to load a demo
in the MediaPipe Visualizer, and over there click the "Runner" icon in the top
bar like shown below. The demos use your webcam video as input, which is
processed all locally in real-time and never leaves your device. Please see
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
in Google Developers Blog for details.
![visualizer_runner](../images/visualizer_runner.png)
## Resources

View File

@ -176,6 +176,16 @@ A list of pose landmarks. Each landmark consists of the following:
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
landmark being visible (present and not occluded) in the image.
#### pose_world_landmarks
Another list of pose landmarks in world coordinates. Each landmark consists of
the following:
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
center between hips.
* `visibility`: Identical to that defined in the corresponding
[pose_landmarks](#pose_landmarks).
#### face_landmarks
A list of 468 face landmarks. Each landmark consists of `x`, `y` and `z`. `x`
@ -245,6 +255,9 @@ with mp_holistic.Holistic(
mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks.
mp_drawing.plot_landmarks(
results.pose_world_landmarks, mp_holistic.POSE_CONNECTIONS)
# For webcam input:
cap = cv2.VideoCapture(0)

View File

@ -69,7 +69,7 @@ and renders using a dedicated
The
[face landmark subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)
internally uses a
[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)
[face detection subgraph](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)
from the
[face detection module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection).
@ -193,7 +193,17 @@ on how to build MediaPipe examples.
### Web
Please refer to [these instructions](../index.md#mediapipe-on-the-web).
You can use the following links to load a demo in the MediaPipe Visualizer, and
over there click the "Runner" icon in the top bar like shown below. The demos
use your webcam video as input, which is processed all locally in real-time and
never leaves your device. Please see
[MediaPipe on the Web](https://developers.googleblog.com/2020/01/mediapipe-on-web.html)
in Google Developers Blog for details.
![visualizer_runner](../images/visualizer_runner.png)
* [MediaPipe Iris](https://viz.mediapipe.dev/demo/iris_tracking)
* [MediaPipe Iris: Depth-from-Iris](https://viz.mediapipe.dev/demo/iris_depth)
## Resources

View File

@ -14,17 +14,27 @@ nav_order: 30
### [Face Detection](https://google.github.io/mediapipe/solutions/face_detection)
* Face detection model for front-facing/selfie camera:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front.tflite),
* Short-range model (best for faces within 2 meters from the camera):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range.tflite),
[TFLite model quantized for EdgeTPU/Coral](https://github.com/google/mediapipe/tree/master/mediapipe/examples/coral/models/face-detector-quantized_edgetpu.tflite),
[Model card](https://mediapipe.page.link/blazeface-mc)
* Face detection model for back-facing camera:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_back.tflite),
* Full-range model (dense, best for faces within 5 meters from the camera):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range.tflite),
[Model card](https://mediapipe.page.link/blazeface-back-mc)
* Face detection model for back-facing camera (sparse):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_back_sparse.tflite),
* Full-range model (sparse, best for faces within 5 meters from the camera):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite),
[Model card](https://mediapipe.page.link/blazeface-back-sparse-mc)
Full-range dense and sparse models have the same quality in terms of
[F-score](https://en.wikipedia.org/wiki/F-score) however differ in underlying
metrics. The dense model is slightly better in
[Recall](https://en.wikipedia.org/wiki/Precision_and_recall) whereas the sparse
model outperforms the dense one in
[Precision](https://en.wikipedia.org/wiki/Precision_and_recall). Speed-wise
sparse model is ~30% faster when executing on CPU via
[XNNPACK](https://github.com/google/XNNPACK) whereas on GPU the models
demonstrate comparable latencies. Depending on your application, you may prefer
one over the other.
### [Face Mesh](https://google.github.io/mediapipe/solutions/face_mesh)

View File

@ -194,10 +194,23 @@ A list of pose landmarks. Each landmark consists of the following:
* `z`: Represents the landmark depth with the depth at the midpoint of hips
being the origin, and the smaller the value the closer the landmark is to
the camera. The magnitude of `z` uses roughly the same scale as `x`.
* `visibility`: A value in `[0.0, 1.0]` indicating the likelihood of the
landmark being visible (present and not occluded) in the image.
#### pose_world_landmarks
*Fig 5. Example of MediaPipe Pose real-world 3D coordinates.* |
:-----------------------------------------------------------: |
<video autoplay muted loop preload style="height: auto; width: 480px"><source src="../images/mobile/pose_world_landmarks.mp4" type="video/mp4"></video> |
Another list of pose landmarks in world coordinates. Each landmark consists of
the following:
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
center between hips.
* `visibility`: Identical to that defined in the corresponding
[pose_landmarks](#pose_landmarks).
### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to
@ -242,6 +255,9 @@ with mp_pose.Pose(
mp_drawing.draw_landmarks(
annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks.
mp_drawing.plot_landmarks(
results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
# For webcam input:
cap = cv2.VideoCapture(0)
@ -294,6 +310,7 @@ Supported configuration options:
<meta charset="utf-8">
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/control_utils_3d.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/@mediapipe/pose/pose.js" crossorigin="anonymous"></script>
</head>
@ -312,8 +329,15 @@ Supported configuration options:
const videoElement = document.getElementsByClassName('input_video')[0];
const canvasElement = document.getElementsByClassName('output_canvas')[0];
const canvasCtx = canvasElement.getContext('2d');
const landmarkContainer = document.getElementsByClassName('landmark-grid-container')[0];
const grid = new LandmarkGrid(landmarkContainer);
function onResults(results) {
if (!results.poseLandmarks) {
grid.updateLandmarks([]);
return;
}
canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(
@ -323,6 +347,8 @@ function onResults(results) {
drawLandmarks(canvasCtx, results.poseLandmarks,
{color: '#FF0000', lineWidth: 2});
canvasCtx.restore();
grid.updateLandmarks(results.poseWorldLandmarks);
}
const pose = new Pose({locateFile: (file) => {

View File

@ -933,8 +933,8 @@ cc_test(
)
cc_library(
name = "split_normalized_landmark_list_calculator",
srcs = ["split_normalized_landmark_list_calculator.cc"],
name = "split_landmarks_calculator",
srcs = ["split_landmarks_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":split_vector_calculator_cc_proto",
@ -948,10 +948,10 @@ cc_library(
)
cc_test(
name = "split_normalized_landmark_list_calculator_test",
srcs = ["split_normalized_landmark_list_calculator_test.cc"],
name = "split_landmarks_calculator_test",
srcs = ["split_landmarks_calculator_test.cc"],
deps = [
":split_normalized_landmark_list_calculator",
":split_landmarks_calculator",
":split_vector_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",

View File

@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT
#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_ // NOLINT
#include "mediapipe/calculators/core/split_vector_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
@ -24,29 +24,30 @@
namespace mediapipe {
// Splits an input packet with NormalizedLandmarkList into
// multiple NormalizedLandmarkList output packets using the [begin, end) ranges
// Splits an input packet with LandmarkListType into
// multiple LandmarkListType output packets using the [begin, end) ranges
// specified in SplitVectorCalculatorOptions. If the option "element_only" is
// set to true, all ranges should be of size 1 and all outputs will be elements
// of type NormalizedLandmark. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type NormalizedLandmarkList.
// of type LandmarkType. If "element_only" is false, ranges can be
// non-zero in size and all outputs will be of type LandmarkListType.
// If the option "combine_outputs" is set to true, only one output stream can be
// specified and all ranges of elements will be combined into one
// NormalizedLandmarkList.
class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
// LandmarkListType.
template <typename LandmarkType, typename LandmarkListType>
class SplitLandmarksCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(cc->Inputs().NumEntries() == 1);
RET_CHECK(cc->Outputs().NumEntries() != 0);
cc->Inputs().Index(0).Set<NormalizedLandmarkList>();
cc->Inputs().Index(0).Set<LandmarkListType>();
const auto& options =
cc->Options<::mediapipe::SplitVectorCalculatorOptions>();
if (options.combine_outputs()) {
RET_CHECK_EQ(cc->Outputs().NumEntries(), 1);
cc->Outputs().Index(0).Set<NormalizedLandmarkList>();
cc->Outputs().Index(0).Set<LandmarkListType>();
for (int i = 0; i < options.ranges_size() - 1; ++i) {
for (int j = i + 1; j < options.ranges_size(); ++j) {
const auto& range_0 = options.ranges(i);
@ -81,9 +82,9 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
return absl::InvalidArgumentError(
"Since element_only is true, all ranges should be of size 1.");
}
cc->Outputs().Index(i).Set<NormalizedLandmark>();
cc->Outputs().Index(i).Set<LandmarkType>();
} else {
cc->Outputs().Index(i).Set<NormalizedLandmarkList>();
cc->Outputs().Index(i).Set<LandmarkListType>();
}
}
}
@ -110,40 +111,39 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
}
absl::Status Process(CalculatorContext* cc) override {
const NormalizedLandmarkList& input =
cc->Inputs().Index(0).Get<NormalizedLandmarkList>();
const LandmarkListType& input =
cc->Inputs().Index(0).Get<LandmarkListType>();
RET_CHECK_GE(input.landmark_size(), max_range_end_)
<< "Max range end " << max_range_end_ << " exceeds landmarks size "
<< input.landmark_size();
if (combine_outputs_) {
NormalizedLandmarkList output;
LandmarkListType output;
for (int i = 0; i < ranges_.size(); ++i) {
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const NormalizedLandmark& input_landmark = input.landmark(j);
const LandmarkType& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
}
}
RET_CHECK_EQ(output.landmark_size(), total_elements_);
cc->Outputs().Index(0).AddPacket(
MakePacket<NormalizedLandmarkList>(output).At(cc->InputTimestamp()));
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
} else {
if (element_only_) {
for (int i = 0; i < ranges_.size(); ++i) {
cc->Outputs().Index(i).AddPacket(
MakePacket<NormalizedLandmark>(input.landmark(ranges_[i].first))
MakePacket<LandmarkType>(input.landmark(ranges_[i].first))
.At(cc->InputTimestamp()));
}
} else {
for (int i = 0; i < ranges_.size(); ++i) {
NormalizedLandmarkList output;
LandmarkListType output;
for (int j = ranges_[i].first; j < ranges_[i].second; ++j) {
const NormalizedLandmark& input_landmark = input.landmark(j);
const LandmarkType& input_landmark = input.landmark(j);
*output.add_landmark() = input_landmark;
}
cc->Outputs().Index(i).AddPacket(
MakePacket<NormalizedLandmarkList>(output).At(
cc->InputTimestamp()));
MakePacket<LandmarkListType>(output).At(cc->InputTimestamp()));
}
}
}
@ -159,9 +159,15 @@ class SplitNormalizedLandmarkListCalculator : public CalculatorBase {
bool combine_outputs_ = false;
};
typedef SplitLandmarksCalculator<NormalizedLandmark, NormalizedLandmarkList>
SplitNormalizedLandmarkListCalculator;
REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator);
typedef SplitLandmarksCalculator<Landmark, LandmarkList>
SplitLandmarkListCalculator;
REGISTER_CALCULATOR(SplitLandmarkListCalculator);
} // namespace mediapipe
// NOLINTNEXTLINE
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_
#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_LANDMARKS_CALCULATOR_H_

View File

@ -80,6 +80,16 @@ mediapipe_proto_library(
],
)
mediapipe_proto_library(
name = "segmentation_smoothing_calculator_proto",
srcs = ["segmentation_smoothing_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "color_convert_calculator",
srcs = ["color_convert_calculator.cc"],
@ -602,3 +612,52 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto",
],
)
cc_library(
name = "segmentation_smoothing_calculator",
srcs = ["segmentation_smoothing_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":segmentation_smoothing_calculator_cc_proto",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/formats:image",
"//mediapipe/framework/formats:image_opencv",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:vector",
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gl_simple_shaders",
"//mediapipe/gpu:gl_quad_renderer",
"//mediapipe/gpu:shader_util",
],
}),
alwayslink = 1,
)
cc_test(
name = "segmentation_smoothing_calculator_test",
srcs = ["segmentation_smoothing_calculator_test.cc"],
deps = [
":image_clone_calculator",
":image_clone_calculator_cc_proto",
":segmentation_smoothing_calculator",
":segmentation_smoothing_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_runner",
"//mediapipe/framework/deps:file_path",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_opencv",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:parse_text_proto",
],
)

View File

@ -0,0 +1,429 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <memory>
#include "mediapipe/calculators/image/segmentation_smoothing_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/image.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/formats/image_opencv.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/opencv_core_inc.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/vector.h"
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gl_simple_shaders.h"
#include "mediapipe/gpu/shader_util.h"
#endif // !MEDIAPIPE_DISABLE_GPU
namespace mediapipe {
namespace {
constexpr char kCurrentMaskTag[] = "MASK";
constexpr char kPreviousMaskTag[] = "MASK_PREVIOUS";
constexpr char kOutputMaskTag[] = "MASK_SMOOTHED";
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
} // namespace
// A calculator for mixing two segmentation masks together,
// based on an uncertantity probability estimate.
//
// Inputs:
// MASK - Image containing the new/current mask.
// [ImageFormat::VEC32F1, or
// GpuBufferFormat::kBGRA32/kRGB24/kGrayHalf16/kGrayFloat32]
// MASK_PREVIOUS - Image containing previous mask.
// [Same format as MASK_CURRENT]
// * If input channels is >1, only the first channel (R) is used as the mask.
//
// Output:
// MASK_SMOOTHED - Blended mask.
// [Same format as MASK_CURRENT]
// * The resulting filtered mask will be stored in R channel,
// and duplicated in A if 4 channels.
//
// Options:
// combine_with_previous_ratio - Amount of previous to blend with current.
//
// Example:
// node {
// calculator: "SegmentationSmoothingCalculator"
// input_stream: "MASK:mask"
// input_stream: "MASK_PREVIOUS:mask_previous"
// output_stream: "MASK_SMOOTHED:mask_smoothed"
// options: {
// [mediapipe.SegmentationSmoothingCalculatorOptions.ext] {
// combine_with_previous_ratio: 0.9
// }
// }
// }
//
class SegmentationSmoothingCalculator : public CalculatorBase {
public:
SegmentationSmoothingCalculator() = default;
static absl::Status GetContract(CalculatorContract* cc);
// From Calculator.
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
absl::Status Close(CalculatorContext* cc) override;
private:
absl::Status RenderGpu(CalculatorContext* cc);
absl::Status RenderCpu(CalculatorContext* cc);
absl::Status GlSetup(CalculatorContext* cc);
void GlRender(CalculatorContext* cc);
float combine_with_previous_ratio_;
bool gpu_initialized_ = false;
#if !MEDIAPIPE_DISABLE_GPU
mediapipe::GlCalculatorHelper gpu_helper_;
GLuint program_ = 0;
#endif // !MEDIAPIPE_DISABLE_GPU
};
REGISTER_CALCULATOR(SegmentationSmoothingCalculator);
absl::Status SegmentationSmoothingCalculator::GetContract(
CalculatorContract* cc) {
CHECK_GE(cc->Inputs().NumEntries(), 1);
cc->Inputs().Tag(kCurrentMaskTag).Set<Image>();
cc->Inputs().Tag(kPreviousMaskTag).Set<Image>();
cc->Outputs().Tag(kOutputMaskTag).Set<Image>();
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status SegmentationSmoothingCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
auto options =
cc->Options<mediapipe::SegmentationSmoothingCalculatorOptions>();
combine_with_previous_ratio_ = options.combine_with_previous_ratio();
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc));
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status SegmentationSmoothingCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag(kCurrentMaskTag).IsEmpty()) {
return absl::OkStatus();
}
if (cc->Inputs().Tag(kPreviousMaskTag).IsEmpty()) {
// Pass through current image if previous is not available.
cc->Outputs()
.Tag(kOutputMaskTag)
.AddPacket(cc->Inputs().Tag(kCurrentMaskTag).Value());
return absl::OkStatus();
}
// Run on GPU if incoming data is on GPU.
const bool use_gpu = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>().UsesGpu();
if (use_gpu) {
#if !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
if (!gpu_initialized_) {
MP_RETURN_IF_ERROR(GlSetup(cc));
gpu_initialized_ = true;
}
MP_RETURN_IF_ERROR(RenderGpu(cc));
return absl::OkStatus();
}));
#else
return absl::InternalError("GPU processing is disabled.");
#endif // !MEDIAPIPE_DISABLE_GPU
} else {
MP_RETURN_IF_ERROR(RenderCpu(cc));
}
return absl::OkStatus();
}
absl::Status SegmentationSmoothingCalculator::Close(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
gpu_helper_.RunInGlContext([this] {
if (program_) glDeleteProgram(program_);
program_ = 0;
});
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status SegmentationSmoothingCalculator::RenderCpu(CalculatorContext* cc) {
// Setup source images.
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
const cv::Mat current_mat = mediapipe::formats::MatView(&current_frame);
RET_CHECK_EQ(current_mat.type(), CV_32FC1)
<< "Only 1-channel float input image is supported.";
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
const cv::Mat previous_mat = mediapipe::formats::MatView(&previous_frame);
RET_CHECK_EQ(previous_mat.type(), current_mat.type())
<< "Warning: mixing input format types: " << previous_mat.type()
<< " != " << previous_mat.type();
RET_CHECK_EQ(current_mat.rows, previous_mat.rows);
RET_CHECK_EQ(current_mat.cols, previous_mat.cols);
// Setup destination image.
auto output_frame = std::make_shared<ImageFrame>(
current_frame.image_format(), current_mat.cols, current_mat.rows);
cv::Mat output_mat = mediapipe::formats::MatView(output_frame.get());
output_mat.setTo(cv::Scalar(0));
// Blending function.
const auto blending_fn = [&](const float prev_mask_value,
const float new_mask_value) {
/*
* Assume p := new_mask_value
* H(p) := 1 + (p * log(p) + (1-p) * log(1-p)) / log(2)
* uncertainty alpha(p) =
* Clamp(1 - (1 - H(p)) * (1 - H(p)), 0, 1) [squaring the uncertainty]
*
* The following polynomial approximates uncertainty alpha as a function
* of (p + 0.5):
*/
const float c1 = 5.68842;
const float c2 = -0.748699;
const float c3 = -57.8051;
const float c4 = 291.309;
const float c5 = -624.717;
const float t = new_mask_value - 0.5f;
const float x = t * t;
const float uncertainty =
1.0f -
std::min(1.0f, x * (c1 + x * (c2 + x * (c3 + x * (c4 + x * c5)))));
return new_mask_value + (prev_mask_value - new_mask_value) *
(uncertainty * combine_with_previous_ratio_);
};
// Write directly to the first channel of output.
for (int i = 0; i < output_mat.rows; ++i) {
float* out_ptr = output_mat.ptr<float>(i);
const float* curr_ptr = current_mat.ptr<float>(i);
const float* prev_ptr = previous_mat.ptr<float>(i);
for (int j = 0; j < output_mat.cols; ++j) {
const float new_mask_value = curr_ptr[j];
const float prev_mask_value = prev_ptr[j];
out_ptr[j] = blending_fn(prev_mask_value, new_mask_value);
}
}
cc->Outputs()
.Tag(kOutputMaskTag)
.AddPacket(MakePacket<Image>(output_frame).At(cc->InputTimestamp()));
return absl::OkStatus();
}
absl::Status SegmentationSmoothingCalculator::RenderGpu(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
// Setup source textures.
const auto& current_frame = cc->Inputs().Tag(kCurrentMaskTag).Get<Image>();
RET_CHECK(
(current_frame.format() == mediapipe::GpuBufferFormat::kBGRA32 ||
current_frame.format() == mediapipe::GpuBufferFormat::kGrayHalf16 ||
current_frame.format() == mediapipe::GpuBufferFormat::kGrayFloat32 ||
current_frame.format() == mediapipe::GpuBufferFormat::kRGB24))
<< "Only RGBA, RGB, or 1-channel Float input image supported.";
auto current_texture = gpu_helper_.CreateSourceTexture(current_frame);
const auto& previous_frame = cc->Inputs().Tag(kPreviousMaskTag).Get<Image>();
if (previous_frame.format() != current_frame.format()) {
LOG(ERROR) << "Warning: mixing input format types. ";
}
auto previous_texture = gpu_helper_.CreateSourceTexture(previous_frame);
// Setup destination texture.
const int width = current_frame.width(), height = current_frame.height();
auto output_texture = gpu_helper_.CreateDestinationTexture(
width, height, current_frame.format());
// Process shader.
{
gpu_helper_.BindFramebuffer(output_texture);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, current_texture.name());
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, previous_texture.name());
GlRender(cc);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
}
glFlush();
// Send out image as GPU packet.
auto output_frame = output_texture.GetFrame<Image>();
cc->Outputs()
.Tag(kOutputMaskTag)
.Add(output_frame.release(), cc->InputTimestamp());
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
void SegmentationSmoothingCalculator::GlRender(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
static const GLfloat square_vertices[] = {
-1.0f, -1.0f, // bottom left
1.0f, -1.0f, // bottom right
-1.0f, 1.0f, // top left
1.0f, 1.0f, // top right
};
static const GLfloat texture_vertices[] = {
0.0f, 0.0f, // bottom left
1.0f, 0.0f, // bottom right
0.0f, 1.0f, // top left
1.0f, 1.0f, // top right
};
// program
glUseProgram(program_);
// vertex storage
GLuint vbo[2];
glGenBuffers(2, vbo);
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
// vbo 0
glBindBuffer(GL_ARRAY_BUFFER, vbo[0]);
glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), square_vertices,
GL_STATIC_DRAW);
glEnableVertexAttribArray(ATTRIB_VERTEX);
glVertexAttribPointer(ATTRIB_VERTEX, 2, GL_FLOAT, 0, 0, nullptr);
// vbo 1
glBindBuffer(GL_ARRAY_BUFFER, vbo[1]);
glBufferData(GL_ARRAY_BUFFER, 4 * 2 * sizeof(GLfloat), texture_vertices,
GL_STATIC_DRAW);
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0, nullptr);
// draw
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// cleanup
glDisableVertexAttribArray(ATTRIB_VERTEX);
glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
glDeleteBuffers(2, vbo);
#endif // !MEDIAPIPE_DISABLE_GPU
}
absl::Status SegmentationSmoothingCalculator::GlSetup(CalculatorContext* cc) {
#if !MEDIAPIPE_DISABLE_GPU
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
};
const GLchar* attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
};
// Shader to blend in previous mask based on computed uncertainty probability.
const std::string frag_src =
absl::StrCat(std::string(mediapipe::kMediaPipeFragmentShaderPreamble),
R"(
DEFAULT_PRECISION(mediump, float)
#ifdef GL_ES
#define fragColor gl_FragColor
#else
out vec4 fragColor;
#endif // defined(GL_ES);
in vec2 sample_coordinate;
uniform sampler2D current_mask;
uniform sampler2D previous_mask;
uniform float combine_with_previous_ratio;
void main() {
vec4 current_pix = texture2D(current_mask, sample_coordinate);
vec4 previous_pix = texture2D(previous_mask, sample_coordinate);
float new_mask_value = current_pix.r;
float prev_mask_value = previous_pix.r;
// Assume p := new_mask_value
// H(p) := 1 + (p * log(p) + (1-p) * log(1-p)) / log(2)
// uncertainty alpha(p) =
// Clamp(1 - (1 - H(p)) * (1 - H(p)), 0, 1) [squaring the uncertainty]
//
// The following polynomial approximates uncertainty alpha as a function
// of (p + 0.5):
const float c1 = 5.68842;
const float c2 = -0.748699;
const float c3 = -57.8051;
const float c4 = 291.309;
const float c5 = -624.717;
float t = new_mask_value - 0.5;
float x = t * t;
float uncertainty =
1.0 - min(1.0, x * (c1 + x * (c2 + x * (c3 + x * (c4 + x * c5)))));
new_mask_value +=
(prev_mask_value - new_mask_value) * (uncertainty * combine_with_previous_ratio);
fragColor = vec4(new_mask_value, 0.0, 0.0, new_mask_value);
}
)");
// Create shader program and set parameters.
mediapipe::GlhCreateProgram(mediapipe::kBasicVertexShader, frag_src.c_str(),
NUM_ATTRIBUTES, (const GLchar**)&attr_name[0],
attr_location, &program_);
RET_CHECK(program_) << "Problem initializing the program.";
glUseProgram(program_);
glUniform1i(glGetUniformLocation(program_, "current_mask"), 1);
glUniform1i(glGetUniformLocation(program_, "previous_mask"), 2);
glUniform1f(glGetUniformLocation(program_, "combine_with_previous_ratio"),
combine_with_previous_ratio_);
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -0,0 +1,35 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message SegmentationSmoothingCalculatorOptions {
extend CalculatorOptions {
optional SegmentationSmoothingCalculatorOptions ext = 377425128;
}
// How much to blend in previous mask, based on a probability estimate.
// Range: [0-1]
// 0 = Use only current frame (no blending).
// 1 = Blend in the previous mask based on uncertainty estimate.
// With ratio at 1, the uncertainty estimate is trusted completely.
// When uncertainty is high, the previous mask is given higher weight.
// Therefore, if both ratio and uncertainty are 1, only old mask is used.
// A pixel is 'uncertain' if its value is close to the middle (0.5 or 127).
optional float combine_with_previous_ratio = 1 [default = 0.0];
}

View File

@ -0,0 +1,206 @@
// Copyright 2018 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "mediapipe/calculators/image/segmentation_smoothing_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
#include "mediapipe/framework/deps/file_path.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_opencv.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
#include "mediapipe/framework/port/opencv_imgproc_inc.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
namespace mediapipe {
namespace {
// 4x4 VEC32F1, center 2x2 block set at ~250
const float mask_data[] = {
0.00, 0.00, 0.00, 0.00, //
0.00, 0.98, 0.98, 0.00, //
0.00, 0.98, 0.98, 0.00, //
0.00, 0.00, 0.00, 0.00, //
};
void RunGraph(Packet curr_packet, Packet prev_packet, bool use_gpu, float ratio,
cv::Mat* result) {
CalculatorGraphConfig graph_config;
if (use_gpu) {
graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
R"pb(
input_stream: "curr_mask"
input_stream: "prev_mask"
output_stream: "new_mask"
node {
calculator: "ImageCloneCalculator"
input_stream: "curr_mask"
output_stream: "curr_mask_gpu"
options: {
[mediapipe.ImageCloneCalculatorOptions.ext] {
output_on_gpu: true
}
}
}
node {
calculator: "ImageCloneCalculator"
input_stream: "prev_mask"
output_stream: "prev_mask_gpu"
options: {
[mediapipe.ImageCloneCalculatorOptions.ext] {
output_on_gpu: true
}
}
}
node {
calculator: "SegmentationSmoothingCalculator"
input_stream: "MASK:curr_mask_gpu"
input_stream: "MASK_PREVIOUS:prev_mask_gpu"
output_stream: "MASK_SMOOTHED:new_mask"
node_options {
[type.googleapis.com/
mediapipe.SegmentationSmoothingCalculatorOptions]: {
combine_with_previous_ratio: $0
}
}
}
)pb",
ratio));
} else {
graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
R"pb(
input_stream: "curr_mask"
input_stream: "prev_mask"
output_stream: "new_mask"
node {
calculator: "SegmentationSmoothingCalculator"
input_stream: "MASK:curr_mask"
input_stream: "MASK_PREVIOUS:prev_mask"
output_stream: "MASK_SMOOTHED:new_mask"
node_options {
[type.googleapis.com/
mediapipe.SegmentationSmoothingCalculatorOptions]: {
combine_with_previous_ratio: $0
}
}
}
)pb",
ratio));
}
std::vector<Packet> output_packets;
tool::AddVectorSink("new_mask", &graph_config, &output_packets);
CalculatorGraph graph(graph_config);
MP_ASSERT_OK(graph.StartRun({}));
MP_ASSERT_OK(
graph.AddPacketToInputStream("curr_mask", curr_packet.At(Timestamp(0))));
MP_ASSERT_OK(
graph.AddPacketToInputStream("prev_mask", prev_packet.At(Timestamp(0))));
MP_ASSERT_OK(graph.WaitUntilIdle());
ASSERT_EQ(1, output_packets.size());
Image result_image = output_packets[0].Get<Image>();
cv::Mat result_mat = formats::MatView(&result_image);
result_mat.copyTo(*result);
// Fully close graph at end, otherwise calculator+Images are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("curr_mask"));
MP_ASSERT_OK(graph.CloseInputStream("prev_mask"));
MP_ASSERT_OK(graph.WaitUntilDone());
}
void RunTest(bool use_gpu, float mix_ratio, cv::Mat& test_result) {
cv::Mat mask_mat(cv::Size(4, 4), CV_32FC1, const_cast<float*>(mask_data));
cv::Mat curr_mat = mask_mat;
// 3x3 blur of 250 block produces all pixels '111'.
cv::Mat prev_mat;
cv::blur(mask_mat, prev_mat, cv::Size(3, 3));
Packet curr_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, curr_mat.size().width, curr_mat.size().height));
curr_mat.copyTo(formats::MatView(&(curr_packet.Get<Image>())));
Packet prev_packet = MakePacket<Image>(std::make_unique<ImageFrame>(
ImageFormat::VEC32F1, prev_mat.size().width, prev_mat.size().height));
prev_mat.copyTo(formats::MatView(&(prev_packet.Get<Image>())));
cv::Mat result;
RunGraph(curr_packet, prev_packet, use_gpu, mix_ratio, &result);
ASSERT_EQ(curr_mat.rows, result.rows);
ASSERT_EQ(curr_mat.cols, result.cols);
ASSERT_EQ(curr_mat.type(), result.type());
result.copyTo(test_result);
if (mix_ratio == 1.0) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
float in = curr_mat.at<float>(i, j);
float out = result.at<float>(i, j);
// Since the input has high value (250), it has low uncertainty.
// So the output should have changed lower (towards prev),
// but not too much.
if (in > 0) EXPECT_NE(in, out);
EXPECT_NEAR(in, out, 3.0 / 255.0);
}
}
} else if (mix_ratio == 0.0) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
float in = curr_mat.at<float>(i, j);
float out = result.at<float>(i, j);
EXPECT_EQ(in, out); // Output should match current.
}
}
} else {
LOG(ERROR) << "invalid ratio";
}
}
TEST(SegmentationSmoothingCalculatorTest, TestSmoothing) {
bool use_gpu;
float mix_ratio;
use_gpu = false;
mix_ratio = 0.0;
cv::Mat cpu_0;
RunTest(use_gpu, mix_ratio, cpu_0);
use_gpu = false;
mix_ratio = 1.0;
cv::Mat cpu_1;
RunTest(use_gpu, mix_ratio, cpu_1);
use_gpu = true;
mix_ratio = 1.0;
cv::Mat gpu_1;
RunTest(use_gpu, mix_ratio, gpu_1);
// CPU & GPU should match.
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
float gpu = gpu_1.at<float>(i, j);
float cpu = cpu_1.at<float>(i, j);
EXPECT_EQ(cpu, gpu);
}
}
}
} // namespace
} // namespace mediapipe

View File

@ -109,6 +109,8 @@ cc_library(
"//mediapipe/gpu:MPPMetalUtil",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/objc:mediapipe_framework_ios",
"//mediapipe/util/tflite:config",
"@com_google_absl//absl/memory",
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate",
"@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal",
"@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape",
@ -478,7 +480,6 @@ cc_library(
deps = [
":image_to_tensor_calculator_cc_proto",
":image_to_tensor_converter",
":image_to_tensor_converter_opencv",
":image_to_tensor_utils",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/formats:image",
@ -494,6 +495,9 @@ cc_library(
] + select({
"//mediapipe/gpu:disable_gpu": [],
"//conditions:default": [":image_to_tensor_calculator_gpu_deps"],
}) + select({
"//mediapipe/framework/port:disable_opencv": [],
"//conditions:default": [":image_to_tensor_converter_opencv"],
}),
alwayslink = 1,
)

View File

@ -18,7 +18,6 @@
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/calculator_framework.h"
@ -33,6 +32,10 @@
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gpu_origin.pb.h"
#if !MEDIAPIPE_DISABLE_OPENCV
#include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
#endif
#if !MEDIAPIPE_DISABLE_GPU
#include "mediapipe/gpu/gpu_buffer.h"
@ -301,8 +304,13 @@ class ImageToTensorCalculator : public Node {
}
} else {
if (!cpu_converter_) {
#if !MEDIAPIPE_DISABLE_OPENCV
ASSIGN_OR_RETURN(cpu_converter_,
CreateOpenCvConverter(cc, GetBorderMode()));
#else
LOG(FATAL) << "Cannot create image to tensor opencv converter since "
"MEDIAPIPE_DISABLE_OPENCV is defined.";
#endif // !MEDIAPIPE_DISABLE_OPENCV
}
}
return absl::OkStatus();

View File

@ -312,7 +312,7 @@ class GlProcessor : public ImageToTensorConverter {
return absl::OkStatus();
}));
return std::move(tensor);
return tensor;
}
~GlProcessor() override {
@ -338,8 +338,7 @@ CreateImageToGlBufferTensorConverter(CalculatorContext* cc,
auto result = absl::make_unique<GlProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
return result;
}
} // namespace mediapipe

View File

@ -334,9 +334,7 @@ CreateImageToGlTextureTensorConverter(CalculatorContext* cc,
BorderMode border_mode) {
auto result = absl::make_unique<GlProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc, input_starts_at_bottom, border_mode));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
return result;
}
} // namespace mediapipe

View File

@ -383,7 +383,7 @@ class MetalProcessor : public ImageToTensorConverter {
tflite::gpu::HW(output_dims.height, output_dims.width),
command_buffer, buffer_view.buffer()));
[command_buffer commit];
return std::move(tensor);
return tensor;
}
}
@ -399,8 +399,7 @@ absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateMetalConverter(
auto result = absl::make_unique<MetalProcessor>();
MP_RETURN_IF_ERROR(result->Init(cc, border_mode));
// Simply "return std::move(result)" failed to build on macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(std::move(result));
return result;
}
} // namespace mediapipe

View File

@ -103,7 +103,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
GetValueRangeTransformation(kInputImageRangeMin, kInputImageRangeMax,
range_min, range_max));
transformed.convertTo(dst, CV_32FC3, transform.scale, transform.offset);
return std::move(tensor);
return tensor;
}
private:
@ -114,10 +114,7 @@ class OpenCvProcessor : public ImageToTensorConverter {
absl::StatusOr<std::unique_ptr<ImageToTensorConverter>> CreateOpenCvConverter(
CalculatorContext* cc, BorderMode border_mode) {
// Simply "return absl::make_unique<OpenCvProcessor>()" failed to build on
// macOS with bazel.
return std::unique_ptr<ImageToTensorConverter>(
absl::make_unique<OpenCvProcessor>(border_mode));
return absl::make_unique<OpenCvProcessor>(border_mode);
}
} // namespace mediapipe

View File

@ -4,7 +4,7 @@ output_stream: "detections"
# Subgraph that detects faces.
node {
calculator: "FaceDetectionFrontCpu"
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:image"
output_stream: "DETECTIONS:detections"
}

View File

@ -490,7 +490,7 @@ class TensorFlowInferenceCalculator : public CalculatorBase {
<< keyed_tensors.first;
}
} else {
// Pad by replicating the first tens or, then ignore the values.
// Pad by replicating the first tensor, then ignore the values.
keyed_tensors.second.resize(options_.batch_size());
std::fill(keyed_tensors.second.begin() +
inference_state->batch_timestamps_.size(),

View File

@ -840,6 +840,20 @@ cc_test(
],
)
cc_library(
name = "world_landmark_projection_calculator",
srcs = ["world_landmark_projection_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "landmarks_smoothing_calculator_proto",
srcs = ["landmarks_smoothing_calculator.proto"],
@ -894,6 +908,31 @@ cc_library(
alwayslink = 1,
)
mediapipe_proto_library(
name = "visibility_copy_calculator_proto",
srcs = ["visibility_copy_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "visibility_copy_calculator",
srcs = ["visibility_copy_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":visibility_copy_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/algorithm:container",
],
alwayslink = 1,
)
cc_library(
name = "landmarks_to_floats_calculator",
srcs = ["landmarks_to_floats_calculator.cc"],

View File

@ -272,6 +272,15 @@ absl::Status AnnotationOverlayCalculator::Open(CalculatorContext* cc) {
}
absl::Status AnnotationOverlayCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().HasTag(kGpuBufferTag) &&
cc->Inputs().Tag(kGpuBufferTag).IsEmpty()) {
return absl::OkStatus();
}
if (cc->Inputs().HasTag(kImageFrameTag) &&
cc->Inputs().Tag(kImageFrameTag).IsEmpty()) {
return absl::OkStatus();
}
// Initialize render target, drawn with OpenCV.
std::unique_ptr<cv::Mat> image_mat;
ImageFormat::Format target_format;

View File

@ -203,6 +203,9 @@ absl::Status DetectionsToRectsCalculator::Process(CalculatorContext* cc) {
cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
return absl::OkStatus();
}
if (rotate_ && !HasTagValue(cc, kImageSizeTag)) {
return absl::OkStatus();
}
std::vector<Detection> detections;
if (cc->Inputs().HasTag(kDetectionTag)) {

View File

@ -130,8 +130,8 @@ absl::Status RectTransformationCalculator::Process(CalculatorContext* cc) {
}
cc->Outputs().Index(0).Add(output_rects.release(), cc->InputTimestamp());
}
if (cc->Inputs().HasTag(kNormRectTag) &&
!cc->Inputs().Tag(kNormRectTag).IsEmpty()) {
if (HasTagValue(cc->Inputs(), kNormRectTag) &&
HasTagValue(cc->Inputs(), kImageSizeTag)) {
auto rect = cc->Inputs().Tag(kNormRectTag).Get<NormalizedRect>();
const auto& image_size =
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
@ -139,8 +139,8 @@ absl::Status RectTransformationCalculator::Process(CalculatorContext* cc) {
cc->Outputs().Index(0).AddPacket(
MakePacket<NormalizedRect>(rect).At(cc->InputTimestamp()));
}
if (cc->Inputs().HasTag(kNormRectsTag) &&
!cc->Inputs().Tag(kNormRectsTag).IsEmpty()) {
if (HasTagValue(cc->Inputs(), kNormRectsTag) &&
HasTagValue(cc->Inputs(), kImageSizeTag)) {
auto rects =
cc->Inputs().Tag(kNormRectsTag).Get<std::vector<NormalizedRect>>();
const auto& image_size =

View File

@ -549,7 +549,7 @@ absl::Status MotionAnalysisCalculator::Process(CalculatorContext* cc) {
timestamp_buffer_.push_back(timestamp);
++frame_idx_;
VLOG_EVERY_N(0, 100) << "Analyzed frame " << frame_idx_;
VLOG_EVERY_N(1, 100) << "Analyzed frame " << frame_idx_;
// Buffer input frames only if visualization is requested.
if (visualize_output_ || video_output_) {

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -0,0 +1,60 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,
linkstatic = 1,
deps = [
"//mediapipe/graphs/face_detection:face_detection_full_range_mobile_gpu_deps",
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
],
)
cc_library(
name = "mediapipe_jni_lib",
srcs = [":libmediapipe_jni.so"],
alwayslink = 1,
)
android_binary(
name = "facedetectionfullrangegpu",
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/face_detection:face_detection_full_range_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_full_range_sparse.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.apps.facedetectionfullrangegpu",
"appName": "Face Detection Full-range (GPU)",
"mainActivity": "com.google.mediapipe.apps.basic.MainActivity",
"cameraFacingFront": "False",
"binaryGraphName": "face_detection_full_range_mobile_gpu.binarypb",
"inputVideoStreamName": "input_video",
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
"converterNumBuffers": "2",
},
multidex = "native",
deps = [
":mediapipe_jni_lib",
"//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib",
],
)

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -42,7 +42,7 @@ android_binary(
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
"//mediapipe/graphs/face_effect:face_effect_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite",

View File

@ -38,7 +38,7 @@ android_binary(
assets = [
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",

View File

@ -39,7 +39,7 @@ android_binary(
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -38,7 +38,7 @@ node {
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.TfLiteInferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
}
}
}

View File

@ -16,6 +16,14 @@ licenses(["notice"])
package(default_visibility = ["//mediapipe/examples:__subpackages__"])
cc_binary(
name = "face_detection_full_range_cpu",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main",
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_deps",
],
)
cc_binary(
name = "face_detection_cpu",
deps = [

View File

@ -55,7 +55,7 @@ objc_library(
name = "FaceDetectionCpuAppLibrary",
data = [
"//mediapipe/graphs/face_detection:face_detection_mobile_cpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -55,7 +55,7 @@ objc_library(
name = "FaceDetectionGpuAppLibrary",
data = [
"//mediapipe/graphs/face_detection:face_detection_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
],
deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -66,7 +66,7 @@ objc_library(
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_detection.binarypb",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata_landmarks.binarypb",
@ -109,7 +109,7 @@ objc_library(
"//mediapipe/graphs/face_effect/data:facepaint.pngblob",
"//mediapipe/graphs/face_effect/data:glasses.binarypb",
"//mediapipe/graphs/face_effect/data:glasses.pngblob",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_geometry/data:geometry_pipeline_metadata.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
],

View File

@ -62,7 +62,7 @@ objc_library(
copts = ["-std=c++17"],
data = [
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
],
deps = [

View File

@ -55,7 +55,7 @@ objc_library(
name = "HolisticTrackingGpuAppLibrary",
data = [
"//mediapipe/graphs/holistic_tracking:holistic_tracking_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",

View File

@ -62,7 +62,7 @@ objc_library(
copts = ["-std=c++17"],
data = [
"//mediapipe/graphs/iris_tracking:iris_tracking_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_front.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/iris_landmark:iris_landmark.tflite",
],

View File

@ -953,6 +953,9 @@ cc_library(
}) + select({
"//conditions:default": [],
"//mediapipe/gpu:disable_gpu": ["MEDIAPIPE_DISABLE_GPU=1"],
}) + select({
"//conditions:default": [],
"//mediapipe/framework/port:disable_opencv": ["MEDIAPIPE_DISABLE_OPENCV=1"],
}) + select({
"//conditions:default": [],
"//mediapipe/framework:disable_rtti_and_exceptions": [

View File

@ -17,6 +17,14 @@ namespace mediapipe {
namespace api2 {
namespace builder {
// Workaround for static_assert(false). Example:
// dependent_false<T>::value returns false.
// For more information, see:
// https://en.cppreference.com/w/cpp/language/if#Constexpr_If
// TODO: migrate to a common utility when available.
template <class T>
struct dependent_false : std::false_type {};
template <typename T>
T& GetWithAutoGrow(std::vector<std::unique_ptr<T>>* vecp, int index) {
auto& vec = *vecp;
@ -209,6 +217,21 @@ class NodeBase {
return SideDestination<true>(&in_sides_[tag]);
}
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
if constexpr (std::is_same_v<B, OutputBase>) {
return Source<kIsMultiple, T>(&out_streams_[port.Tag()]);
} else if constexpr (std::is_same_v<B, InputBase>) {
return Destination<kIsMultiple, T>(&in_streams_[port.Tag()]);
} else if constexpr (std::is_same_v<B, SideOutputBase>) {
return SideSource<kIsMultiple, T>(&out_sides_[port.Tag()]);
} else if constexpr (std::is_same_v<B, SideInputBase>) {
return SideDestination<kIsMultiple, T>(&in_sides_[port.Tag()]);
} else {
static_assert(dependent_false<B>::value, "Type not supported.");
}
}
// Convenience methods for accessing purely index-based ports.
Source<false> Out(int index) { return Out("")[index]; }
@ -429,6 +452,24 @@ class Graph {
return Dst(&graph_boundary_.in_sides_[graph_output.Tag()]);
}
template <typename B, typename T, bool kIsOptional, bool kIsMultiple>
auto operator[](const PortCommon<B, T, kIsOptional, kIsMultiple>& port) {
if constexpr (std::is_same_v<B, OutputBase>) {
return Destination<kIsMultiple, T>(
&graph_boundary_.in_streams_[port.Tag()]);
} else if constexpr (std::is_same_v<B, InputBase>) {
return Source<kIsMultiple, T>(&graph_boundary_.out_streams_[port.Tag()]);
} else if constexpr (std::is_same_v<B, SideOutputBase>) {
return SideDestination<kIsMultiple, T>(
&graph_boundary_.in_sides_[port.Tag()]);
} else if constexpr (std::is_same_v<B, SideInputBase>) {
return SideSource<kIsMultiple, T>(
&graph_boundary_.out_sides_[port.Tag()]);
} else {
static_assert(dependent_false<B>::value, "Type not supported.");
}
}
// Returns the graph config. This can be used to instantiate and run the
// graph.
CalculatorGraphConfig GetConfig() {

View File

@ -138,6 +138,35 @@ TEST(BuilderTest, TypedMultiple) {
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
}
TEST(BuilderTest, TypedByPorts) {
builder::Graph graph;
auto& foo = graph.AddNode<Foo>();
auto& adder = graph.AddNode<FloatAdder>();
graph[FooBar1::kIn].SetName("base") >> foo[Foo::kBase];
foo[Foo::kOut] >> adder[FloatAdder::kIn][0];
foo[Foo::kOut] >> adder[FloatAdder::kIn][1];
adder[FloatAdder::kOut].SetName("out") >> graph[FooBar1::kOut];
CalculatorGraphConfig expected =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
input_stream: "IN:base"
output_stream: "OUT:out"
node {
calculator: "Foo"
input_stream: "BASE:base"
output_stream: "OUT:__stream_0"
}
node {
calculator: "FloatAdder"
input_stream: "IN:0:__stream_0"
input_stream: "IN:1:__stream_0"
output_stream: "OUT:out"
}
)pb");
EXPECT_THAT(graph.GetConfig(), EqualsProto(expected));
}
TEST(BuilderTest, PacketGenerator) {
builder::Graph graph;
auto& generator = graph.AddPacketGenerator("FloatGenerator");

View File

@ -186,6 +186,7 @@ cc_library(
"//conditions:default": [
"//mediapipe/framework/port:opencv_imgproc",
],
"//mediapipe/framework/port:disable_opencv": [],
}) + select({
"//conditions:default": [
],

View File

@ -76,10 +76,7 @@ bool Image::ConvertToGpu() const {
gpu_buffer_ = mediapipe::GpuBuffer(std::move(buffer));
#else
// GlCalculatorHelperImpl::MakeGlTextureBuffer (CreateSourceTexture)
auto buffer = mediapipe::GlTextureBuffer::Create(
image_frame_->Width(), image_frame_->Height(),
mediapipe::GpuBufferFormatForImageFormat(image_frame_->Format()),
image_frame_->PixelData());
auto buffer = mediapipe::GlTextureBuffer::Create(*image_frame_);
glBindTexture(GL_TEXTURE_2D, buffer->name());
// See GlCalculatorHelperImpl::SetStandardTextureParams
glTexParameteri(buffer->target(), GL_TEXTURE_MIN_FILTER, GL_LINEAR);

View File

@ -32,7 +32,12 @@
// clang-format off
#if !defined(LOCATION_OPENCV)
# if !MEDIAPIPE_DISABLE_OPENCV && \
(!defined(MEDIAPIPE_MOBILE) || defined(MEDIAPIPE_ANDROID_OPENCV))
# define LOCATION_OPENCV 1
# else
# define LOCATION_OPENCV 0
# endif
#endif
#if LOCATION_OPENCV

View File

@ -158,12 +158,12 @@ cc_library(
hdrs = [
"gmock.h",
"gtest.h",
"gtest-spi.h",
"status_matchers.h",
],
visibility = ["//visibility:public"],
deps = [
":status_matchers",
"//mediapipe/framework:port",
"@com_google_googletest//:gtest",
],
)
@ -174,12 +174,12 @@ cc_library(
hdrs = [
"gmock.h",
"gtest.h",
"gtest-spi.h",
"status_matchers.h",
],
visibility = ["//visibility:public"],
deps = [
":status_matchers",
"//mediapipe/framework:port",
"//mediapipe/framework/deps:status_matchers",
"@com_google_googletest//:gtest_main",
],
@ -217,6 +217,16 @@ cc_library(
deps = ["//mediapipe/framework/deps:numbers"],
)
# Disabling opencv when defining MEDIAPIPE_DISABLE_OPENCV to 1 in the bazel command.
# Note that this only applies to a select few calculators/framework components currently.
config_setting(
name = "disable_opencv",
define_values = {
"MEDIAPIPE_DISABLE_OPENCV": "1",
},
visibility = ["//visibility:public"],
)
cc_library(
name = "opencv_core",
hdrs = ["opencv_core_inc.h"],

View File

@ -0,0 +1,20 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_PORT_GTEST_SPI_H_
#define MEDIAPIPE_PORT_GTEST_SPI_H_
#include "gtest/gtest-spi.h"
#endif // MEDIAPIPE_PORT_GTEST_SPI_H_

View File

@ -585,6 +585,7 @@ cc_library(
"//mediapipe:apple": [
":gl_calculator_helper_ios",
"//mediapipe/objc:util",
"//mediapipe/objc:CFHolder",
],
}),
)
@ -714,11 +715,12 @@ cc_library(
deps = [
":gl_calculator_helper",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
] + select({
"//conditions:default": [],
"//mediapipe:apple": ["//mediapipe/objc:util"],
}),
alwayslink = 1,
)

View File

@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/gpu/gl_calculator_helper_impl.h"
#include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/gpu_shared_data_internal.h"
@ -176,10 +179,8 @@ GlTexture GlCalculatorHelperImpl::MapGlTextureBuffer(
GlTextureBufferSharedPtr GlCalculatorHelperImpl::MakeGlTextureBuffer(
const ImageFrame& image_frame) {
CHECK(gl_context_->IsCurrent());
auto buffer = GlTextureBuffer::Create(
image_frame.Width(), image_frame.Height(),
GpuBufferFormatForImageFormat(image_frame.Format()),
image_frame.PixelData());
auto buffer = GlTextureBuffer::Create(image_frame);
if (buffer->format_ != GpuBufferFormat::kUnknown) {
glBindTexture(GL_TEXTURE_2D, buffer->name_);

View File

@ -32,15 +32,56 @@ std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Wrap(
std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Create(int width, int height,
GpuBufferFormat format,
const void* data) {
const void* data,
int alignment) {
auto buf = absl::make_unique<GlTextureBuffer>(GL_TEXTURE_2D, 0, width, height,
format, nullptr);
if (!buf->CreateInternal(data)) {
if (!buf->CreateInternal(data, alignment)) {
return nullptr;
}
return buf;
}
static inline int AlignedToPowerOf2(int value, int alignment) {
// alignment must be a power of 2
return ((value - 1) | (alignment - 1)) + 1;
}
std::unique_ptr<GlTextureBuffer> GlTextureBuffer::Create(
const ImageFrame& image_frame) {
int base_ws = image_frame.Width() * image_frame.NumberOfChannels() *
image_frame.ByteDepth();
int actual_ws = image_frame.WidthStep();
int alignment = 0;
std::unique_ptr<ImageFrame> temp;
const uint8* data = image_frame.PixelData();
// Let's see if the pixel data is tightly aligned to one of the alignments
// supported by OpenGL, preferring 4 if possible since it's the default.
if (actual_ws == AlignedToPowerOf2(base_ws, 4))
alignment = 4;
else if (actual_ws == AlignedToPowerOf2(base_ws, 1))
alignment = 1;
else if (actual_ws == AlignedToPowerOf2(base_ws, 2))
alignment = 2;
else if (actual_ws == AlignedToPowerOf2(base_ws, 8))
alignment = 8;
// If no GL-compatible alignment was found, we copy the data to a temporary
// buffer, aligned to 4. We do this using another ImageFrame purely for
// convenience.
if (!alignment) {
temp = std::make_unique<ImageFrame>();
temp->CopyFrom(image_frame, 4);
data = temp->PixelData();
alignment = 4;
}
return Create(image_frame.Width(), image_frame.Height(),
GpuBufferFormatForImageFormat(image_frame.Format()), data,
alignment);
}
GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width,
int height, GpuBufferFormat format,
DeletionCallback deletion_callback,
@ -53,7 +94,7 @@ GlTextureBuffer::GlTextureBuffer(GLenum target, GLuint name, int width,
deletion_callback_(deletion_callback),
producer_context_(producer_context) {}
bool GlTextureBuffer::CreateInternal(const void* data) {
bool GlTextureBuffer::CreateInternal(const void* data, int alignment) {
auto context = GlContext::GetCurrent();
if (!context) return false;
@ -66,8 +107,11 @@ bool GlTextureBuffer::CreateInternal(const void* data) {
GlTextureInfo info =
GlTextureInfoForGpuBufferFormat(format_, 0, context->GetGlVersion());
if (alignment != 4 && data) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
// See b/70294573 for details about this.
if (info.gl_internal_format == GL_RGBA16F &&
context->GetGlVersion() != GlVersion::kGLES2 &&
SymbolAvailable(&glTexStorage2D)) {
CHECK(data == nullptr) << "unimplemented";
glTexStorage2D(target_, 1, info.gl_internal_format, width_, height_);
@ -76,6 +120,8 @@ bool GlTextureBuffer::CreateInternal(const void* data) {
height_, 0 /* border */, info.gl_format, info.gl_type, data);
}
if (alignment != 4 && data) glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glBindTexture(target_, 0);
// Use the deletion callback to delete the texture on the context

View File

@ -21,6 +21,7 @@
#include <atomic>
#include "absl/memory/memory.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/gpu/gl_base.h"
#include "mediapipe/gpu/gl_context.h"
#include "mediapipe/gpu/gpu_buffer_format.h"
@ -60,7 +61,11 @@ class GlTextureBuffer {
// provided later via glTexSubImage2D.
static std::unique_ptr<GlTextureBuffer> Create(int width, int height,
GpuBufferFormat format,
const void* data = nullptr);
const void* data = nullptr,
int alignment = 4);
// Create a texture with a copy of the data in image_frame.
static std::unique_ptr<GlTextureBuffer> Create(const ImageFrame& image_frame);
// Wraps an existing texture, but does not take ownership of it.
// deletion_callback is invoked when the GlTextureBuffer is released, so
@ -127,7 +132,7 @@ class GlTextureBuffer {
// If data is provided, it is uploaded to the texture; otherwise, it can be
// provided later via glTexSubImage2D.
// Returns true on success.
bool CreateInternal(const void* data = nullptr);
bool CreateInternal(const void* data, int alignment = 4);
friend class GlCalculatorHelperImpl;

View File

@ -51,8 +51,6 @@ namespace mediapipe {
constexpr int kMaxShaderInfoLength = 1024;
GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
GLint status;
*shader = glCreateShader(target);
if (*shader == 0) {
return GL_FALSE;
@ -62,6 +60,11 @@ GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
GL_DEBUG_LOG(Shader, *shader, "compile");
#if UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
return GL_TRUE;
#else
GLint status;
glGetShaderiv(*shader, GL_COMPILE_STATUS, &status);
LOG_IF(ERROR, status == GL_FALSE) << "Failed to compile shader:\n" << source;
@ -72,19 +75,24 @@ GLint GlhCompileShader(GLenum target, const GLchar* source, GLuint* shader) {
LOG(ERROR) << "Error message: " << std::string(cmessage, length);
}
return status;
#endif // UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
}
GLint GlhLinkProgram(GLuint program) {
GLint status;
glLinkProgram(program);
#if UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
return GL_TRUE;
#else
GLint status;
GL_DEBUG_LOG(Program, program, "link");
glGetProgramiv(program, GL_LINK_STATUS, &status);
LOG_IF(ERROR, status == GL_FALSE) << "Failed to link program " << program;
return status;
#endif // UNSAFE_EMSCRIPTEN_SKIP_GL_ERROR_HANDLING
}
GLint GlhValidateProgram(GLuint program) {

View File

@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
licenses(["notice"])
@ -24,8 +28,8 @@ cc_library(
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
"//mediapipe/gpu:image_frame_to_gpu_buffer_calculator",
"//mediapipe/modules/face_detection:face_detection_front_cpu",
"//mediapipe/modules/face_detection:face_detection_front_gpu",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
],
)
@ -35,7 +39,7 @@ cc_library(
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_front_cpu",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
],
)
@ -45,15 +49,10 @@ cc_library(
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_front_gpu",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
],
)
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
mediapipe_binary_graph(
name = "face_detection_mobile_cpu_binary_graph",
graph = "face_detection_mobile_cpu.pbtxt",
@ -67,3 +66,30 @@ mediapipe_binary_graph(
output_name = "face_detection_mobile_gpu.binarypb",
deps = [":mobile_calculators"],
)
cc_library(
name = "face_detection_full_range_mobile_gpu_deps",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_full_range_gpu",
],
)
mediapipe_binary_graph(
name = "face_detection_full_range_mobile_gpu_binary_graph",
graph = "face_detection_full_range_mobile_gpu.pbtxt",
output_name = "face_detection_full_range_mobile_gpu.binarypb",
deps = [":face_detection_full_range_mobile_gpu_deps"],
)
cc_library(
name = "face_detection_full_range_desktop_live_deps",
deps = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
],
)

View File

@ -1,169 +0,0 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on CPU to a 128x128 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
# resulting in potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:throttled_input_video"
output_stream: "IMAGE:transformed_input_video_cpu"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 192
output_height: 192
scale_mode: FIT
}
}
}
# Converts the transformed input image on CPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE:transformed_input_video_cpu"
output_stream: "TENSORS:image_tensor"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS:image_tensor"
output_stream: "TENSORS:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 1
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 192
input_size_width: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 4
aspect_ratios: 1.0
fixed_anchor_size: true
interpolated_scale_aspect_ratio: 0.0
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 1
num_boxes: 2304
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 192.0
y_scale: 192.0
h_scale: 192.0
w_scale: 192.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
return_empty_detections: true
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:output_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -1,169 +0,0 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
# mediapipie/examples/ios/facedetectiongpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on GPU to a 128x128 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
# resulting in potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 192
output_height: 192
scale_mode: FIT
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "TENSORS_GPU:image_tensor"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS_GPU:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/modules/face_detection/face_detection_back.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 1
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 192
input_size_width: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 4
aspect_ratios: 1.0
fixed_anchor_size: true
interpolated_scale_aspect_ratio: 0.0
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS_GPU:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 1
num_boxes: 2304
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 192.0
y_scale: 192.0
h_scale: 192.0
w_scale: 192.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
return_empty_detections: true
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:output_detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -31,7 +31,7 @@ node {
# Subgraph that detects faces.
node {
calculator: "FaceDetectionFrontCpu"
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:face_detections"
}

View File

@ -0,0 +1,60 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on CPU.
# Used in the examples in
# mediapipe/examples/desktop/face_detection:face_detection_cpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects faces.
node {
calculator: "FaceDetectionFullRangeCpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,60 @@
# MediaPipe graph that performs face detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipie/examples/android/src/java/com/mediapipe/apps/facedetectiongpu and
# mediapipie/examples/ios/facedetectiongpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Detects faces.
node {
calculator: "FaceDetectionFullRangeGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:detections"
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "IMAGE_GPU:output_video"
}

View File

@ -41,7 +41,7 @@ node: {
# Subgraph that detects faces.
node {
calculator: "FaceDetectionFrontCpu"
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:input_video_cpu"
output_stream: "DETECTIONS:face_detections"
}

View File

@ -31,7 +31,7 @@ node {
# Subgraph that detects faces.
node {
calculator: "FaceDetectionFrontGpu"
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:throttled_input_video"
output_stream: "DETECTIONS:face_detections"
}

View File

@ -39,7 +39,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/core:concatenate_detection_vector_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/face_detection:face_detection_front_gpu",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
"//mediapipe/modules/face_geometry:face_geometry_from_detection",
],
)

View File

@ -24,7 +24,7 @@ output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Subgraph that detects faces and corresponding landmarks using the face
# detection pipeline.
node {
calculator: "FaceDetectionFrontGpu"
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:input_image"
output_stream: "DETECTIONS:multi_face_detection"
}

View File

@ -24,7 +24,7 @@ package(default_visibility = ["//visibility:public"])
cc_library(
name = "renderer_calculators",
deps = [
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",

View File

@ -30,7 +30,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:merge_calculator",
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",

View File

@ -26,7 +26,7 @@ cc_library(
deps = [
"//mediapipe/calculators/core:concatenate_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:concatenate_vector_calculator",
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detection_label_id_to_text_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",

View File

@ -26,7 +26,7 @@ mediapipe_simple_subgraph(
graph = "pose_renderer_gpu.pbtxt",
register_as = "PoseRendererGpu",
deps = [
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",
@ -40,7 +40,7 @@ mediapipe_simple_subgraph(
graph = "pose_renderer_cpu.pbtxt",
register_as = "PoseRendererCpu",
deps = [
"//mediapipe/calculators/core:split_normalized_landmark_list_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:annotation_overlay_calculator",
"//mediapipe/calculators/util:detections_to_render_data_calculator",
"//mediapipe/calculators/util:landmarks_to_render_data_calculator",

View File

@ -91,15 +91,15 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
GLES20.glViewport(0, 0, width, height);
}
@Override
public void onDrawFrame(GL10 gl) {
/** Renders the frame. Note that the {@link #flush} method must be called afterwards. */
protected TextureFrame renderFrame() {
TextureFrame frame = nextFrame.getAndSet(null);
GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT);
ShaderUtil.checkGlError("glClear");
if (surfaceTexture == null && frame == null) {
return;
return null;
}
GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
@ -161,14 +161,28 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer {
GLES20.glBindTexture(textureTarget, 0);
ShaderUtil.checkGlError("unbind surfaceTexture");
// We must flush before releasing the frame.
GLES20.glFlush();
return frame;
}
/**
* Calls {@link #GLES20.glFlush} and releases the texture frame. Should be invoked after the
* {@link #renderFrame} method is called.
*
* @param frame the {@link TextureFrame} to be released after {@link #GLES20.glFlush}.
*/
protected void flush(TextureFrame frame) {
GLES20.glFlush();
if (frame != null) {
frame.release();
}
}
@Override
public void onDrawFrame(GL10 gl) {
TextureFrame frame = renderFrame();
flush(frame);
}
public void setTextureTarget(int target) {
if (program != 0) {
throw new IllegalStateException(

View File

@ -16,7 +16,6 @@ package com.google.mediapipe.framework;
import android.graphics.Bitmap;
import java.nio.ByteBuffer;
import java.util.List;
// TODO: use Preconditions in this file.
/**

View File

@ -19,7 +19,7 @@ package com.google.mediapipe.framework;
* has reached the specified point in the sequence of commands it is executing. This can be
* necessary when working with multiple GL contexts.
*/
final class GraphGlSyncToken implements GlSyncToken {
public final class GraphGlSyncToken implements GlSyncToken {
private long token;
@Override
@ -44,7 +44,7 @@ final class GraphGlSyncToken implements GlSyncToken {
}
}
GraphGlSyncToken(long token) {
public GraphGlSyncToken(long token) {
this.token = token;
}

View File

@ -73,6 +73,7 @@ cc_library(
],
"//mediapipe/gpu:disable_gpu": [],
}),
features = ["-no_undefined"],
linkopts = select({
"//conditions:default": [],
"//mediapipe:android": [

View File

@ -583,9 +583,9 @@ absl::Status Graph::SetParentGlContext(int64 java_gl_context) {
#if MEDIAPIPE_DISABLE_GPU
LOG(FATAL) << "GPU support has been disabled in this build!";
#else
gpu_resources_ = mediapipe::GpuResources::Create(
reinterpret_cast<EGLContext>(java_gl_context))
.value();
ASSIGN_OR_RETURN(gpu_resources_,
mediapipe::GpuResources::Create(
reinterpret_cast<EGLContext>(java_gl_context)));
#endif // MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}

View File

@ -46,6 +46,7 @@ load("@build_bazel_rules_android//android:rules.bzl", "android_binary", "android
def mediapipe_aar(
name,
srcs = [],
gen_libmediapipe = True,
calculators = [],
assets = [],
assets_dir = ""):
@ -54,12 +55,14 @@ def mediapipe_aar(
Args:
name: the name of the aar.
srcs: the additional java source code to be added into the android library.
gen_libmediapipe: whether to generate libmediapipe_jni.so. Default to True.
calculators: the calculator libraries to be compiled into the jni library.
assets: additional assets to be included into the archive.
assets_dir: path where the assets will the packaged.
"""
_mediapipe_jni(
name = name + "_jni",
gen_libmediapipe = gen_libmediapipe,
calculators = calculators,
)
@ -67,6 +70,22 @@ def mediapipe_aar(
name = name + "_proto",
)
native.genrule(
name = name + "_aar_manifest_generator",
outs = ["AndroidManifest.xml"],
cmd = """
cat > $(OUTS) <<EOF
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
</manifest>
EOF
""",
)
android_library(
name = name + "_android_lib",
srcs = srcs + [
@ -84,7 +103,6 @@ def mediapipe_aar(
proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"],
deps = [
":" + name + "_jni_cc_lib",
":" + name + "_jni_opencv_cc_lib",
"//mediapipe/framework:calculator_java_proto_lite",
"//mediapipe/framework:calculator_profile_java_proto_lite",
"//mediapipe/framework:calculator_options_java_proto_lite",
@ -94,6 +112,10 @@ def mediapipe_aar(
"//mediapipe/framework:status_handler_java_proto_lite",
"//mediapipe/framework:stream_handler_java_proto_lite",
"//mediapipe/framework/tool:calculator_graph_template_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:androidx_annotation",
"//third_party:androidx_appcompat",
"//third_party:androidx_core",
@ -108,7 +130,10 @@ def mediapipe_aar(
"@maven//:com_google_flogger_flogger_system_backend",
"@maven//:com_google_guava_guava",
"@maven//:androidx_lifecycle_lifecycle_common",
],
] + select({
"//conditions:default": [":" + name + "_jni_opencv_cc_lib"],
"//mediapipe/framework/port:disable_opencv": [],
}),
assets = assets,
assets_dir = assets_dir,
)
@ -121,22 +146,6 @@ def _mediapipe_proto(name):
Args:
name: the name of the target.
"""
native.genrule(
name = name + "_aar_manifest_generator",
outs = ["AndroidManifest.xml"],
cmd = """
cat > $(OUTS) <<EOF
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="27" />
<application />
</manifest>
""",
)
_proto_java_src_generator(
name = "calculator_proto",
proto_src = "mediapipe/framework/calculator.proto",
@ -204,13 +213,15 @@ def _proto_java_src_generator(name, proto_src, java_lite_out, srcs = []):
],
)
def _mediapipe_jni(name, calculators = []):
def _mediapipe_jni(name, gen_libmediapipe, calculators = []):
"""Generates MediaPipe jni library.
Args:
name: the name of the target.
gen_libmediapipe: whether to generate libmediapipe_jni.so. Default to True.
calculators: the calculator libraries to be compiled into the jni library.
"""
if gen_libmediapipe:
native.cc_binary(
name = "libmediapipe_jni.so",
linkshared = 1,

View File

@ -22,6 +22,9 @@ android_library(
["*.java"],
exclude = [
"CameraInput.java",
"ResultGlRenderer.java",
"SolutionGlSurfaceView.java",
"SolutionGlSurfaceViewRenderer.java",
],
),
visibility = ["//visibility:public"],
@ -29,6 +32,7 @@ android_library(
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:autovalue",
"@com_google_protobuf//:protobuf_javalite",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_guava_guava",
],
@ -46,6 +50,23 @@ android_library(
],
)
android_library(
name = "solution_rendering",
srcs = [
"ResultGlRenderer.java",
"SolutionGlSurfaceView.java",
"SolutionGlSurfaceViewRenderer.java",
],
visibility = ["//visibility:public"],
deps = [
":solution_base",
"//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil",
"@maven//:com_google_guava_guava",
],
)
# Native dependencies of all MediaPipe solutions.
cc_binary(
name = "libmediapipe_jni.so",
@ -65,3 +86,11 @@ cc_library(
visibility = ["//visibility:public"],
alwayslink = 1,
)
load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar")
mediapipe_aar(
name = "solution_core",
srcs = glob(["*.java"]),
gen_libmediapipe = False,
)

View File

@ -39,6 +39,9 @@ public class ImageSolutionResult implements SolutionResult {
// Returns the corresponding input image as a {@link Bitmap}.
public Bitmap inputBitmap() {
if (imagePacket == null) {
return null;
}
if (cachedBitmap != null) {
return cachedBitmap;
}
@ -49,6 +52,9 @@ public class ImageSolutionResult implements SolutionResult {
// Returns the corresponding input image as a {@link TextureFrame}. The caller must release the
// acquired {@link TextureFrame} after using.
public TextureFrame acquireTextureFrame() {
if (imagePacket == null) {
return null;
}
return PacketGetter.getTextureFrame(imagePacket);
}

View File

@ -0,0 +1,25 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutionbase;
/** Interface for the customizable MediaPipe solution result OpenGL renderer. */
public interface ResultGlRenderer<T extends ImageSolutionResult> {
/** Sets up OpenGL rendering when the surface is created or recreated. */
void setupRendering();
/** Renders the solution result. */
void renderResult(T result);
}

View File

@ -45,9 +45,7 @@ public class SolutionBase {
protected final AtomicBoolean solutionGraphStarted = new AtomicBoolean(false);
static {
// Load all native libraries needed by the app.
System.loadLibrary("mediapipe_jni");
System.loadLibrary("opencv_java3");
}
/**

View File

@ -0,0 +1,118 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutionbase;
import android.content.Context;
import android.opengl.GLES20;
import android.opengl.GLSurfaceView;
import android.util.Log;
import android.view.SurfaceHolder;
import android.view.View;
import com.google.mediapipe.glutil.EglManager;
import javax.microedition.khronos.egl.EGL10;
import javax.microedition.khronos.egl.EGLConfig;
import javax.microedition.khronos.egl.EGLContext;
import javax.microedition.khronos.egl.EGLDisplay;
/**
* A simplified GlSurfaceView implementation for displaying MediaPipe Solution results.
*
* <p>Users need to provide a custom {@link ResultGlRenderer} via {@link
* setSolutionResultRenderer(ResultGlRenderer)} for rendering MediaPipe solution results. Setting
* the latest render data by calling {@link #setRenderData(ImageSolutionResult)} before invoking
* {@link #requestRender}. By default, the solution renderer renders the input images. Call {@link
* #setRenderInputImage(boolean)} to explicitly set whether the input images should be rendered or
* not.
*/
public class SolutionGlSurfaceView<T extends ImageSolutionResult> extends GLSurfaceView {
private static final String TAG = "SolutionGlSurfaceView";
SolutionGlSurfaceViewRenderer<T> renderer = new SolutionGlSurfaceViewRenderer<>();
/**
* Sets a user-defined {@link ResultGlRenderer} for rendering MediaPipe solution results.
*
* @param resultRenderer a {@link ResultGlRenderer}.
*/
public void setSolutionResultRenderer(ResultGlRenderer<T> resultRenderer) {
renderer.setSolutionResultRenderer(resultRenderer);
}
/**
* Sets the next textureframe and solution result to render.
*
* @param solutionResult a solution result object that contains the solution outputs and a
* textureframe.
*/
public void setRenderData(T solutionResult) {
renderer.setRenderData(solutionResult);
}
/** Sets if the input image needs to be rendered. Default to true. */
public void setRenderInputImage(boolean renderInputImage) {
renderer.setRenderInputImage(renderInputImage);
}
/** Initializes SolutionGlSurfaceView with Android context, gl context, and gl version number. */
public SolutionGlSurfaceView(Context context, EGLContext glContext, int glMajorVersion) {
super(context);
setEGLContextClientVersion(glMajorVersion);
getHolder().addCallback(new HolderCallbacks());
setEGLContextFactory(
new GLSurfaceView.EGLContextFactory() {
@Override
public EGLContext createContext(EGL10 egl, EGLDisplay display, EGLConfig eglConfig) {
int[] contextAttrs = {
EglManager.EGL_CONTEXT_CLIENT_VERSION, glMajorVersion, EGL10.EGL_NONE
};
return egl.eglCreateContext(display, eglConfig, glContext, contextAttrs);
}
@Override
public void destroyContext(EGL10 egl, EGLDisplay display, EGLContext context) {
if (!egl.eglDestroyContext(display, context)) {
throw new RuntimeException("eglDestroyContext failed");
}
}
});
renderer.setTextureTarget(GLES20.GL_TEXTURE_2D);
super.setRenderer(renderer);
setRenderMode(GLSurfaceView.RENDERMODE_WHEN_DIRTY);
setVisibility(View.GONE);
}
private class HolderCallbacks implements SurfaceHolder.Callback {
@Override
public void surfaceCreated(SurfaceHolder holder) {
Log.d(TAG, "main surfaceCreated");
}
@Override
public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
Log.d(
TAG,
String.format(
"main surfaceChanged. width: %d height: %d glViewWidth: %d glViewHeight: %d",
width,
height,
SolutionGlSurfaceView.this.getWidth(),
SolutionGlSurfaceView.this.getHeight()));
}
@Override
public void surfaceDestroyed(SurfaceHolder holder) {
Log.d(TAG, "main surfaceDestroyed");
}
}
}

View File

@ -0,0 +1,83 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutionbase;
import android.graphics.SurfaceTexture;
import com.google.mediapipe.components.GlSurfaceViewRenderer;
import com.google.mediapipe.framework.TextureFrame;
import java.util.concurrent.atomic.AtomicReference;
import javax.microedition.khronos.egl.EGLConfig;
import javax.microedition.khronos.opengles.GL10;
/**
* MediaPipe Solution's GlSurfaceViewRenderer.
*
* <p>Users can provide a custom {@link ResultGlRenderer} for rendering MediaPipe solution results.
* For setting the latest solution result, call {@link #setRenderData(ImageSolutionResult)}. By
* default, the renderer renders the input images. Call {@link #setRenderInputImage(boolean)} to
* explicitly set whether the input images should be rendered or not.
*/
public class SolutionGlSurfaceViewRenderer<T extends ImageSolutionResult>
extends GlSurfaceViewRenderer {
private static final String TAG = "SolutionGlSurfaceViewRenderer";
private boolean renderInputImage = true;
private final AtomicReference<T> nextSolutionResult = new AtomicReference<>();
private ResultGlRenderer<T> resultGlRenderer;
/** Sets if the input image needs to be rendered. Default to true. */
public void setRenderInputImage(boolean renderInputImage) {
this.renderInputImage = renderInputImage;
}
/** Sets a user-defined {@link ResultGlRenderer} for rendering MediaPipe solution results. */
public void setSolutionResultRenderer(ResultGlRenderer<T> resultGlRenderer) {
this.resultGlRenderer = resultGlRenderer;
}
/**
* Sets the next textureframe and solution result to render.
*
* @param solutionResult a solution result object that contains the solution outputs and a
* textureframe.
*/
public void setRenderData(T solutionResult) {
setNextFrame(solutionResult.acquireTextureFrame());
nextSolutionResult.getAndSet(solutionResult);
}
@Override
public void onSurfaceCreated(GL10 gl, EGLConfig config) {
super.onSurfaceCreated(gl, config);
resultGlRenderer.setupRendering();
}
@Override
public void onDrawFrame(GL10 gl) {
TextureFrame frame = null;
if (renderInputImage) {
frame = renderFrame();
}
if (nextSolutionResult != null) {
T solutionResult = nextSolutionResult.getAndSet(null);
resultGlRenderer.renderResult(solutionResult);
}
flush(frame);
}
@Override
public void setSurfaceTexture(SurfaceTexture texture) {
throw new IllegalStateException("SurfaceTexture should not be used in MediaPipe Solution.");
}
}

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.solutions.hands">
<uses-sdk android:minSdkVersion="21"
android:targetSdkVersion="27" />
</manifest>

View File

@ -0,0 +1,45 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
android_library(
name = "hands",
srcs = [
"HandLandmark.java",
"Hands.java",
"HandsOptions.java",
"HandsResult.java",
],
assets = [
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
],
assets_dir = "",
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],
manifest = ":AndroidManifest.xml",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework/formats:classification_java_proto_lite",
"//mediapipe/framework/formats:landmark_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/solutionbase:solution_base",
"//third_party:autovalue",
"@maven//:androidx_annotation_annotation",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_guava_guava",
],
)

View File

@ -0,0 +1,72 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.hands;
import androidx.annotation.IntDef;
/** The 21 hand landmarks. */
public final class HandLandmark {
public static final int NUM_LANDMARKS = 21;
public static final int WRIST = 0;
public static final int THUMB_CMC = 1;
public static final int THUMB_MCP = 2;
public static final int THUMB_DIP = 3;
public static final int THUMB_TIP = 4;
public static final int INDEX_FINGER_MCP = 5;
public static final int INDEX_FINGER_PIP = 6;
public static final int INDEX_FINGER_DIP = 7;
public static final int INDEX_FINGER_TIP = 8;
public static final int MIDDLE_FINGER_MCP = 9;
public static final int MIDDLE_FINGER_PIP = 10;
public static final int MIDDLE_FINGER_DIP = 11;
public static final int MIDDLE_FINGER_TIP = 12;
public static final int RING_FINGER_MCP = 13;
public static final int RING_FINGER_PIP = 14;
public static final int RING_FINGER_DIP = 15;
public static final int RING_FINGER_TIP = 16;
public static final int PINKY_MCP = 17;
public static final int PINKY_PIP = 18;
public static final int PINKY_DIP = 19;
public static final int PINKY_TIP = 20;
/** Represents a hand landmark type. */
@IntDef({
WRIST,
THUMB_CMC,
THUMB_MCP,
THUMB_DIP,
THUMB_TIP,
INDEX_FINGER_MCP,
INDEX_FINGER_PIP,
INDEX_FINGER_DIP,
INDEX_FINGER_TIP,
MIDDLE_FINGER_MCP,
MIDDLE_FINGER_PIP,
MIDDLE_FINGER_DIP,
MIDDLE_FINGER_TIP,
RING_FINGER_MCP,
RING_FINGER_PIP,
RING_FINGER_DIP,
RING_FINGER_TIP,
PINKY_MCP,
PINKY_PIP,
PINKY_DIP,
PINKY_TIP,
})
public @interface HandLandmarkType {}
private HandLandmark() {}
}

View File

@ -0,0 +1,132 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.hands;
import android.content.Context;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutionbase.ErrorListener;
import com.google.mediapipe.solutionbase.ImageSolutionBase;
import com.google.mediapipe.solutionbase.OutputHandler;
import com.google.mediapipe.solutionbase.ResultListener;
import com.google.mediapipe.solutionbase.SolutionInfo;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
/**
* MediaPipe Hands Solution API.
*
* <p>MediaPipe Hands processes a {@link TextureFrame} or a {@link Bitmap} and returns the hand
* landmarks and handedness (left v.s. right hand) of each detected hand. Please refer to
* https://solutions.mediapipe.dev/hands#android-solution-api for usage examples.
*/
public class Hands extends ImageSolutionBase {
private static final String TAG = "Hands";
private static final String NUM_HANDS = "num_hands";
private static final String SOLUTION_GRAPH_NAME = "hand_landmark_tracking_gpu_image.binarypb";
private static final String IMAGE_INPUT_STREAM = "image";
private static final ImmutableList<String> OUTPUT_STREAMS =
ImmutableList.of("multi_hand_landmarks", "multi_handedness", "image");
private static final int LANDMARKS_INDEX = 0;
private static final int HANDEDNESS_INDEX = 1;
private static final int INPUT_IMAGE_INDEX = 2;
private final OutputHandler<HandsResult> graphOutputHandler;
/**
* Initializes MediaPipe Hands solution.
*
* @param context an Android {@link Context}.
* @param options the configuration options defined in {@link HandsOptions}.
*/
public Hands(Context context, HandsOptions options) {
graphOutputHandler = new OutputHandler<>();
graphOutputHandler.setOutputConverter(
packets -> {
HandsResult.Builder handsResultBuilder = HandsResult.builder();
try {
handsResultBuilder.setMultiHandLandmarks(
getProtoVector(packets.get(LANDMARKS_INDEX), NormalizedLandmarkList.parser()));
} catch (MediaPipeException e) {
throwException("Error occurs when getting MediaPipe hand landmarks. ", e);
}
try {
handsResultBuilder.setMultiHandedness(
getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser()));
} catch (MediaPipeException e) {
throwException("Error occurs when getting MediaPipe handedness data. ", e);
}
return handsResultBuilder
.setImagePacket(packets.get(INPUT_IMAGE_INDEX))
.setTimestamp(
staticImageMode ? Long.MIN_VALUE : packets.get(INPUT_IMAGE_INDEX).getTimestamp())
.build();
});
SolutionInfo solutionInfo =
SolutionInfo.builder()
.setBinaryGraphPath(SOLUTION_GRAPH_NAME)
.setImageInputStreamName(IMAGE_INPUT_STREAM)
.setOutputStreamNames(OUTPUT_STREAMS)
.setStaticImageMode(options.mode() == HandsOptions.STATIC_IMAGE_MODE)
.build();
initialize(context, solutionInfo, graphOutputHandler);
Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(NUM_HANDS, packetCreator.createInt32(options.maxNumHands()));
start(inputSidePackets);
}
/**
* Sets a callback to be invoked when the HandsResults become available.
*
* @param listener the {@link ResultListener} callback.
*/
public void setResultListener(ResultListener<HandsResult> listener) {
this.graphOutputHandler.setResultListener(listener);
}
/**
* Sets a callback to be invoked when the Hands solution throws errors.
*
* @param listener the {@link ErrorListener} callback.
*/
public void setErrorListener(@Nullable ErrorListener listener) {
this.graphOutputHandler.setErrorListener(listener);
this.errorListener = listener;
}
/**
* Gets a specific hand landmark by hand index and hand landmark type.
*
* @param result the returned {@link HandsResult} object.
* @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
* @param landmarkType the hand landmark type defined in {@link HandLandmark}.
*/
public static NormalizedLandmark getHandLandmark(
HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
if (result == null
|| handIndex >= result.multiHandLandmarks().size()
|| landmarkType >= HandLandmark.NUM_LANDMARKS) {
return NormalizedLandmark.getDefaultInstance();
}
return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
}
}

View File

@ -0,0 +1,77 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.hands;
import androidx.annotation.IntDef;
import com.google.auto.value.AutoValue;
/**
* MediaPipe Hands solution-specific options.
*
* <p>mode: Whether to treat the input images as a batch of static and possibly unrelated images, or
* a video stream. See details in https://solutions.mediapipe.dev/hands#static_image_mode.
*
* <p>maxNumHands: Maximum number of hands to detect. See details in
* https://solutions.mediapipe.dev/hands#max_num_hands.
*
* <p>minDetectionConfidence: Minimum confidence value ([0.0, 1.0]) for hand detection to be
* considered successful. See details in
* https://solutions.mediapipe.dev/hands#min_detection_confidence.
*
* <p>minTrackingConfidence: Minimum confidence value ([0.0, 1.0]) for the hand landmarks to be
* considered tracked successfully. See details in
* https://solutions.mediapipe.dev/hands#min_tracking_confidence.
*/
@AutoValue
public abstract class HandsOptions {
// TODO: Switch to use boolean variable.
public static final int STREAMING_MODE = 1;
public static final int STATIC_IMAGE_MODE = 2;
/**
* Indicates whether to treat the input images as a batch of static and possibly unrelated images,
* or a video stream.
*/
@IntDef({STREAMING_MODE, STATIC_IMAGE_MODE})
public @interface Mode {}
@Mode
public abstract int mode();
public abstract int maxNumHands();
public abstract float minDetectionConfidence();
public abstract float minTrackingConfidence();
public static Builder builder() {
return new AutoValue_HandsOptions.Builder();
}
/** Builder for {@link HandsOptions}. */
@AutoValue.Builder
public abstract static class Builder {
public abstract Builder setMode(int value);
public abstract Builder setMaxNumHands(int value);
public abstract Builder setMinDetectionConfidence(float value);
public abstract Builder setMinTrackingConfidence(float value);
public abstract HandsOptions build();
}
}

View File

@ -0,0 +1,81 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.hands;
import android.graphics.Bitmap;
import com.google.auto.value.AutoBuilder;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.framework.TextureFrame;
import com.google.mediapipe.solutionbase.ImageSolutionResult;
import java.util.List;
/**
* HandsResult contains a collection of detected/tracked hands, a collection of handedness of the
* detected/tracked hands, and the input {@link Bitmap} or {@link TextureFrame}. If not in static
* image mode, the timestamp field will be set to the timestamp of the corresponding input image.
*/
public class HandsResult extends ImageSolutionResult {
private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
private final ImmutableList<Classification> multiHandedness;
HandsResult(
ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
ImmutableList<Classification> multiHandedness,
Packet imagePacket,
long timestamp) {
this.multiHandLandmarks = multiHandLandmarks;
this.multiHandedness = multiHandedness;
this.timestamp = timestamp;
this.imagePacket = imagePacket;
}
// Collection of detected/tracked hands, where each hand is represented as a list of 21 hand
// landmarks and each landmark is composed of x, y and z. x and y are normalized to [0.0, 1.0] by
// the image width and height respectively. z represents the landmark depth with the depth at the
// wrist being the origin, and the smaller the value the closer the landmark is to the camera. The
// magnitude of z uses roughly the same scale as x.
public ImmutableList<NormalizedLandmarkList> multiHandLandmarks() {
return multiHandLandmarks;
}
// Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
// hand is composed of label and score. label is a string of value either "Left" or "Right". score
// is the estimated probability of the predicted handedness and is always greater than or equal to
// 0.5 (and the opposite handedness has an estimated probability of 1 - score).
public ImmutableList<Classification> multiHandedness() {
return multiHandedness;
}
public static Builder builder() {
return new AutoBuilder_HandsResult_Builder();
}
/** Builder for {@link HandsResult}. */
@AutoBuilder
public abstract static class Builder {
abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);
abstract Builder setMultiHandedness(List<Classification> value);
abstract Builder setTimestamp(long value);
abstract Builder setImagePacket(Packet value);
abstract HandsResult build();
}
}

View File

@ -22,11 +22,11 @@ licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_detection_front_by_roi_cpu",
graph = "face_detection_front_by_roi_cpu.pbtxt",
register_as = "FaceDetectionFrontByRoiCpu",
name = "face_detection_short_range_by_roi_cpu",
graph = "face_detection_short_range_by_roi_cpu.pbtxt",
register_as = "FaceDetectionShortRangeByRoiCpu",
deps = [
":face_detection_front_common",
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
@ -34,11 +34,11 @@ mediapipe_simple_subgraph(
)
mediapipe_simple_subgraph(
name = "face_detection_front_by_roi_gpu",
graph = "face_detection_front_by_roi_gpu.pbtxt",
register_as = "FaceDetectionFrontByRoiGpu",
name = "face_detection_short_range_by_roi_gpu",
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
register_as = "FaceDetectionShortRangeByRoiGpu",
deps = [
":face_detection_front_common",
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
@ -46,11 +46,11 @@ mediapipe_simple_subgraph(
)
mediapipe_simple_subgraph(
name = "face_detection_front_cpu",
graph = "face_detection_front_cpu.pbtxt",
register_as = "FaceDetectionFrontCpu",
name = "face_detection_short_range_cpu",
graph = "face_detection_short_range_cpu.pbtxt",
register_as = "FaceDetectionShortRangeCpu",
deps = [
":face_detection_front_common",
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
@ -58,11 +58,11 @@ mediapipe_simple_subgraph(
)
mediapipe_simple_subgraph(
name = "face_detection_front_gpu",
graph = "face_detection_front_gpu.pbtxt",
register_as = "FaceDetectionFrontGpu",
name = "face_detection_short_range_gpu",
graph = "face_detection_short_range_gpu.pbtxt",
register_as = "FaceDetectionShortRangeGpu",
deps = [
":face_detection_front_common",
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
@ -70,9 +70,45 @@ mediapipe_simple_subgraph(
)
mediapipe_simple_subgraph(
name = "face_detection_front_common",
graph = "face_detection_front_common.pbtxt",
register_as = "FaceDetectionFrontCommon",
name = "face_detection_short_range_common",
graph = "face_detection_short_range_common.pbtxt",
register_as = "FaceDetectionShortRangeCommon",
deps = [
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/util:detection_projection_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_cpu",
graph = "face_detection_full_range_cpu.pbtxt",
register_as = "FaceDetectionFullRangeCpu",
deps = [
":face_detection_full_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_gpu",
graph = "face_detection_full_range_gpu.pbtxt",
register_as = "FaceDetectionFullRangeGpu",
deps = [
":face_detection_full_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_common",
graph = "face_detection_full_range_common.pbtxt",
register_as = "FaceDetectionFullRangeCommon",
deps = [
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
@ -83,8 +119,8 @@ mediapipe_simple_subgraph(
exports_files(
srcs = [
"face_detection_back.tflite",
"face_detection_back_sparse.tflite",
"face_detection_front.tflite",
"face_detection_full_range.tflite",
"face_detection_full_range_sparse.tflite",
"face_detection_short_range.tflite",
],
)

View File

@ -2,6 +2,7 @@
Subgraphs|Details
:--- | :---
[`FaceDetectionFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (CPU input, and inference is executed on CPU.)
[`FaceDetectionFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (GPU input, and inference is executed on GPU.)
[`FaceDetectionFullRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (CPU input, and inference is executed on CPU.)
[`FaceDetectionFullRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (GPU input, and inference is executed on GPU.)
[`FaceDetectionShortRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (CPU input, and inference is executed on CPU.)
[`FaceDetectionShortRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (GPU input, and inference is executed on GPU.)

View File

@ -0,0 +1,102 @@
# MediaPipe graph performing common processing to detect faces using
# face_detection_full_range_sparse.tflite model, currently consisting of tensor
# post processing.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeCommon"
# input_stream: "TENSORS:detection_tensors"
# input_stream: "MATRIX:transform_matrix"
# output_stream: "DETECTIONS:detections"
# }
type: "FaceDetectionShortRangeCommon"
# Detection tensors. (std::vector<Tensor>)
input_stream: "TENSORS:detection_tensors"
# A 4x4 row-major-order matrix that maps a point represented in the detection
# tensors to a desired coordinate system, e.g., in the original input image
# before scaling/cropping. (std::array<float, 16>)
input_stream: "MATRIX:transform_matrix"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 1
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 192
input_size_width: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 4
aspect_ratios: 1.0
fixed_anchor_size: true
interpolated_scale_aspect_ratio: 0.0
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 2304
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 192.0
y_scale: 192.0
h_scale: 192.0
w_scale: 192.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Projects the detections from input tensor to the corresponding locations on
# the original image (input to the graph).
node {
calculator: "DetectionProjectionCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "PROJECTION_MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -0,0 +1,79 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_full_range_sparse.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeCpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFullRangeCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 192x192 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
delegate { xnnpack {} }
}
#
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionFullRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -0,0 +1,80 @@
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
# GPU.)
#
# It is required that "face_detection_full_range_sparse.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeGpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFullRangeGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
#
delegate: { gpu { use_advanced_gpu_api: true } }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionFullRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

Some files were not shown because too many files have changed in this diff Show More