Project import generated by Copybara.

GitOrigin-RevId: 373e3ac1e5839befd95bf7d73ceff3c5f1171969
This commit is contained in:
MediaPipe Team 2021-10-06 13:44:33 -07:00 committed by jqtang
parent 137e1cc763
commit 33d683c671
153 changed files with 7874 additions and 1352 deletions

View File

@ -16,11 +16,11 @@ bazel_skylib_workspace()
load("@bazel_skylib//lib:versions.bzl", "versions") load("@bazel_skylib//lib:versions.bzl", "versions")
versions.check(minimum_bazel_version = "3.7.2") versions.check(minimum_bazel_version = "3.7.2")
# ABSL cpp library lts_2020_09_23 # ABSL cpp library lts_2021_03_24, patch 2.
http_archive( http_archive(
name = "com_google_absl", name = "com_google_absl",
urls = [ urls = [
"https://github.com/abseil/abseil-cpp/archive/20200923.tar.gz", "https://github.com/abseil/abseil-cpp/archive/refs/tags/20210324.2.tar.gz",
], ],
# Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved. # Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
patches = [ patches = [
@ -29,8 +29,8 @@ http_archive(
patch_args = [ patch_args = [
"-p1", "-p1",
], ],
strip_prefix = "abseil-cpp-20200923", strip_prefix = "abseil-cpp-20210324.2",
sha256 = "b3744a4f7a249d5eaf2309daad597631ce77ea62e0fc6abffbab4b4c3dc0fc08" sha256 = "59b862f50e710277f8ede96f083a5bb8d7c9595376146838b9580be90374ee1f"
) )
http_archive( http_archive(
@ -333,6 +333,7 @@ maven_install(
"androidx.concurrent:concurrent-futures:1.0.0-alpha03", "androidx.concurrent:concurrent-futures:1.0.0-alpha03",
"androidx.lifecycle:lifecycle-common:2.3.1", "androidx.lifecycle:lifecycle-common:2.3.1",
"androidx.activity:activity:1.2.2", "androidx.activity:activity:1.2.2",
"androidx.exifinterface:exifinterface:1.3.3",
"androidx.fragment:fragment:1.3.4", "androidx.fragment:fragment:1.3.4",
"androidx.annotation:annotation:aar:1.1.0", "androidx.annotation:annotation:aar:1.1.0",
"androidx.appcompat:appcompat:aar:1.1.0-rc01", "androidx.appcompat:appcompat:aar:1.1.0-rc01",
@ -349,8 +350,8 @@ maven_install(
"com.google.auto.value:auto-value:1.8.1", "com.google.auto.value:auto-value:1.8.1",
"com.google.auto.value:auto-value-annotations:1.8.1", "com.google.auto.value:auto-value-annotations:1.8.1",
"com.google.code.findbugs:jsr305:latest.release", "com.google.code.findbugs:jsr305:latest.release",
"com.google.flogger:flogger-system-backend:latest.release", "com.google.flogger:flogger-system-backend:0.6",
"com.google.flogger:flogger:latest.release", "com.google.flogger:flogger:0.6",
"com.google.guava:guava:27.0.1-android", "com.google.guava:guava:27.0.1-android",
"com.google.guava:listenablefuture:1.0", "com.google.guava:listenablefuture:1.0",
"junit:junit:4.12", "junit:junit:4.12",
@ -389,6 +390,8 @@ http_archive(
patches = [ patches = [
"@//third_party:org_tensorflow_compatibility_fixes.diff", "@//third_party:org_tensorflow_compatibility_fixes.diff",
"@//third_party:org_tensorflow_objc_cxx17.diff", "@//third_party:org_tensorflow_objc_cxx17.diff",
# Diff is generated with a script, don't update it manually.
"@//third_party:org_tensorflow_custom_ops.diff",
], ],
patch_args = [ patch_args = [
"-p1", "-p1",

View File

@ -26,15 +26,17 @@ the following into the project's Gradle dependencies:
``` ```
dependencies { dependencies {
// MediaPipe solution-core is the foundation of any MediaPipe solutions. // MediaPipe solution-core is the foundation of any MediaPipe Solutions.
implementation 'com.google.mediapipe:solution-core:latest.release' implementation 'com.google.mediapipe:solution-core:latest.release'
// Optional: MediaPipe Hands solution. // Optional: MediaPipe Face Detection Solution.
implementation 'com.google.mediapipe:hands:latest.release' implementation 'com.google.mediapipe:facedetection:latest.release'
// Optional: MediaPipe FaceMesh solution. // Optional: MediaPipe Face Mesh Solution.
implementation 'com.google.mediapipe:facemesh:latest.release' implementation 'com.google.mediapipe:facemesh:latest.release'
// Optional: MediaPipe Hands Solution.
implementation 'com.google.mediapipe:hands:latest.release'
// MediaPipe deps // MediaPipe deps
implementation 'com.google.flogger:flogger:latest.release' implementation 'com.google.flogger:flogger:0.6'
implementation 'com.google.flogger:flogger-system-backend:latest.release' implementation 'com.google.flogger:flogger-system-backend:0.6'
implementation 'com.google.guava:guava:27.0.1-android' implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4' implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library // CameraX core library
@ -45,7 +47,7 @@ dependencies {
} }
``` ```
See the detailed solutions API usage examples for different use cases in the See the detailed solution APIs usage examples for different use cases in the
solution example apps' solution example apps'
[source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions). [source code](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions).
If the prebuilt maven packages are not sufficient, building the MediaPipe If the prebuilt maven packages are not sufficient, building the MediaPipe

View File

@ -103,7 +103,7 @@ monotonically increasing timestamps. By convention, realtime calculators and
graphs use the recording time or the presentation time as the timestamp for each graphs use the recording time or the presentation time as the timestamp for each
packet, with each timestamp representing microseconds since packet, with each timestamp representing microseconds since
`Jan/1/1970:00:00:00`. This allows packets from various sources to be processed `Jan/1/1970:00:00:00`. This allows packets from various sources to be processed
in a gloablly consistent order. in a globally consistent order.
Normally for offline processing, every input packet is processed and processing Normally for offline processing, every input packet is processed and processing
continues as long as necessary. For online processing, it is often necessary to continues as long as necessary. For online processing, it is often necessary to

Binary file not shown.

After

Width:  |  Height:  |  Size: 797 KiB

View File

@ -121,12 +121,10 @@ with mp_face_detection.FaceDetection(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_detection.process(image) results = face_detection.process(image)
# Draw the face detection annotations on the image. # Draw the face detection annotations on the image.
@ -135,7 +133,8 @@ with mp_face_detection.FaceDetection(
if results.detections: if results.detections:
for detection in results.detections: for detection in results.detections:
mp_drawing.draw_detection(image, detection) mp_drawing.draw_detection(image, detection)
cv2.imshow('MediaPipe Face Detection', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -200,7 +199,7 @@ const faceDetection = new FaceDetection({locateFile: (file) => {
return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`; return `https://cdn.jsdelivr.net/npm/@mediapipe/face_detection@0.0/${file}`;
}}); }});
faceDetection.setOptions({ faceDetection.setOptions({
modelSelection: 0 modelSelection: 0,
minDetectionConfidence: 0.5 minDetectionConfidence: 0.5
}); });
faceDetection.onResults(onResults); faceDetection.onResults(onResults);
@ -216,6 +215,194 @@ camera.start();
</script> </script>
``` ```
### Android Solution API
Please first follow general
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
to add MediaPipe Gradle dependencies, then try the Face Detection Solution API
in the companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facedetection)
following
[these instructions](../getting_started/android_solutions.md#build-solution-example-apps-in-android-studio)
and learn more in the usage example below.
* [staticImageMode](#static_image_mode)
* [modelSelection](#model_selection)
#### Camera Input
```java
// For camera input and result rendering with OpenGL.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(false)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Face Detection Solution.
CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(
textureFrame -> faceDetection.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseTip.getX(), noseTip.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceDetectionResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the GLSurfaceView is attached.
glSurfaceView.post(
() ->
cameraInput.start(
this,
faceDetection.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
```
#### Image Input
```java
// For reading images from gallery and drawing the output in an ImageView.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(true)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
// Connects MediaPipe Face Detection Solution to the user-defined ImageView
// instance that allows users to have the custom drawing of the output landmarks
// on it. See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultImageView.java
// as an example.
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
faceDetection.setResultListener(
faceDetectionResult -> {
int width = faceDetectionResult.inputBitmap().getWidth();
int height = faceDetectionResult.inputBitmap().getHeight();
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip coordinates (pixel values): x=%f, y=%f",
noseTip.getX() * width, noseTip.getY() * height));
// Request canvas drawing.
imageView.setFaceDetectionResult(faceDetectionResult);
runOnUiThread(() -> imageView.update());
});
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null && result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
if (bitmap != null) {
faceDetection.send(bitmap);
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
```
#### Video Input
```java
// For video input and result rendering with OpenGL.
FaceDetectionOptions faceDetectionOptions =
FaceDetectionOptions.builder()
.setStaticImageMode(false)
.setModelSelection(0).build();
FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Face Detection Solution.
VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener(
textureFrame -> faceDetection.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceDetectionResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code.
// See mediapipe/examples/android/solutions/facedetection/src/main/java/com/google/mediapipe/examples/facedetection/FaceDetectionResultGlRenderer.java
// as an example.
SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseTip.getX(), noseTip.getY()));
// Request GL rendering.
glSurfaceView.setRenderData(faceDetectionResult);
glSurfaceView.requestRender();
});
ActivityResultLauncher<Intent> videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
faceDetection.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
```
## Example Apps ## Example Apps
Please first see general instructions for Please first see general instructions for

View File

@ -111,6 +111,23 @@ You can find more information about the face landmark model in this
:------------------------------------------------------------------------: | :------------------------------------------------------------------------: |
*Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* | *Fig 2. Face landmarks: the red box indicates the cropped area as input to the landmark model, the red dots represent the 468 landmarks in 3D, and the green lines connecting landmarks illustrate the contours around the eyes, eyebrows, lips and the entire face.* |
#### Attention Mesh Model
In addition to the [Face Landmark Model](#face-landmark-model) we provide
another model that applies
[attention](https://en.wikipedia.org/wiki/Attention_(machine_learning)) to
semantically meaningful face regions, and therefore predicting landmarks more
accurately around lips, eyes and irises, at the expense of more compute. It
enables applications like AR makeup and AR puppeteering.
The attention mesh model can be selected in the Solution APIs via the
[refine_landmarks](#refine_landmarks) option. You can also find more information
about the model in this [paper](https://arxiv.org/abs/2006.10962).
![attention_mesh_architecture.png](../images/attention_mesh_architecture.png) |
:---------------------------------------------------------------------------: |
*Fig 3. Attention Mesh: Overview of model architecture.* |
## Face Geometry Module ## Face Geometry Module
The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark
@ -145,8 +162,8 @@ be set freely, however for better results it is advised to set them as close to
the *real physical camera parameters* as possible. the *real physical camera parameters* as possible.
![face_geometry_metric_3d_space.gif](../images/face_geometry_metric_3d_space.gif) | ![face_geometry_metric_3d_space.gif](../images/face_geometry_metric_3d_space.gif) |
:----------------------------------------------------------------------------: | :-------------------------------------------------------------------------------: |
*Fig 3. A visualization of multiple key elements in the Metric 3D space.* | *Fig 4. A visualization of multiple key elements in the Metric 3D space.* |
#### Canonical Face Model #### Canonical Face Model
@ -210,7 +227,7 @@ The effect renderer is implemented as a MediaPipe
| ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) | | ![face_geometry_renderer.gif](../images/face_geometry_renderer.gif) |
| :---------------------------------------------------------------------: | | :---------------------------------------------------------------------: |
| *Fig 4. An example of face effects rendered by the Face Geometry Effect Renderer.* | | *Fig 5. An example of face effects rendered by the Face Geometry Effect Renderer.* |
## Solution APIs ## Solution APIs
@ -234,6 +251,12 @@ unrelated, images. Default to `false`.
Maximum number of faces to detect. Default to `1`. Maximum number of faces to detect. Default to `1`.
#### refine_landmarks
Whether to further refine the landmark coordinates around the eyes and lips, and
output additional landmarks around the irises by applying the
[Attention Mesh Model](#attention-mesh-model). Default to `false`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the Minimum confidence value (`[0.0, 1.0]`) from the face detection model for the
@ -271,6 +294,7 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [max_num_faces](#max_num_faces) * [max_num_faces](#max_num_faces)
* [refine_landmarks](#refine_landmarks)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -287,6 +311,7 @@ drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh( with mp_face_mesh.FaceMesh(
static_image_mode=True, static_image_mode=True,
max_num_faces=1, max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5) as face_mesh: min_detection_confidence=0.5) as face_mesh:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
@ -313,12 +338,21 @@ with mp_face_mesh.FaceMesh(
landmark_drawing_spec=None, landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style()) .get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks(
image=annotated_image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image) cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# For webcam input: # For webcam input:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh( with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5, min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh: min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened(): while cap.isOpened():
@ -328,12 +362,10 @@ with mp_face_mesh.FaceMesh(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_mesh.process(image) results = face_mesh.process(image)
# Draw the face mesh annotations on the image. # Draw the face mesh annotations on the image.
@ -355,7 +387,15 @@ with mp_face_mesh.FaceMesh(
landmark_drawing_spec=None, landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style()) .get_default_face_mesh_contours_style())
cv2.imshow('MediaPipe FaceMesh', image) mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -370,6 +410,7 @@ and the following usage example.
Supported configuration options: Supported configuration options:
* [maxNumFaces](#max_num_faces) * [maxNumFaces](#max_num_faces)
* [refineLandmarks](#refine_landmarks)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -410,8 +451,10 @@ function onResults(results) {
{color: '#C0C0C070', lineWidth: 1}); {color: '#C0C0C070', lineWidth: 1});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'}); drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYE, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'}); drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_EYEBROW, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_RIGHT_IRIS, {color: '#FF3030'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYE, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_EYEBROW, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LEFT_IRIS, {color: '#30FF30'});
drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'}); drawConnectors(canvasCtx, landmarks, FACEMESH_FACE_OVAL, {color: '#E0E0E0'});
drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'}); drawConnectors(canvasCtx, landmarks, FACEMESH_LIPS, {color: '#E0E0E0'});
} }
@ -424,6 +467,7 @@ const faceMesh = new FaceMesh({locateFile: (file) => {
}}); }});
faceMesh.setOptions({ faceMesh.setOptions({
maxNumFaces: 1, maxNumFaces: 1,
refineLandmarks: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });
@ -444,7 +488,7 @@ camera.start();
Please first follow general Please first follow general
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api) [instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
to add MediaPipe Gradle dependencies, then try the FaceMash solution API in the to add MediaPipe Gradle dependencies, then try the Face Mesh Solution API in the
companion companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh) [example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/facemesh)
following following
@ -455,6 +499,7 @@ Supported configuration options:
* [staticImageMode](#static_image_mode) * [staticImageMode](#static_image_mode)
* [maxNumFaces](#max_num_faces) * [maxNumFaces](#max_num_faces)
* [refineLandmarks](#refine_landmarks)
* runOnGpu: Run the pipeline and the model inference on GPU or CPU. * runOnGpu: Run the pipeline and the model inference on GPU or CPU.
#### Camera Input #### Camera Input
@ -463,17 +508,18 @@ Supported configuration options:
// For camera input and result rendering with OpenGL. // For camera input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions = FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder() FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become .setStaticImageMode(false)
.setMaxNumFaces(1) // setStaticImageMode(false) .setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
facemesh.setErrorListener( faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); (message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh. // Initializes a new CameraInput instance and connects it to MediaPipe Face Mesh Solution.
CameraInput cameraInput = new CameraInput(this); CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener( cameraInput.setNewFrameListener(
textureFrame -> facemesh.send(textureFrame)); textureFrame -> faceMesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance // Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code. // that provides the interfaces to run user-defined OpenGL rendering code.
@ -481,18 +527,18 @@ cameraInput.setNewFrameListener(
// as an example. // as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView = SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>( new SolutionGlSurfaceView<>(
this, facemesh.getGlContext(), facemesh.getGlMajorVersion()); this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer()); glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true); glSurfaceView.setRenderInputImage(true);
facemesh.setResultListener( faceMesh.setResultListener(
faceMeshResult -> { faceMeshResult -> {
NormalizedLandmark noseLandmark = NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1); result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", "MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY())); noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering. // Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult); glSurfaceView.setRenderData(faceMeshResult);
@ -504,7 +550,7 @@ glSurfaceView.post(
() -> () ->
cameraInput.start( cameraInput.start(
this, this,
facemesh.getGlContext(), faceMesh.getGlContext(),
CameraInput.CameraFacing.FRONT, CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(), glSurfaceView.getWidth(),
glSurfaceView.getHeight())); glSurfaceView.getHeight()));
@ -516,17 +562,18 @@ glSurfaceView.post(
// For reading images from gallery and drawing the output in an ImageView. // For reading images from gallery and drawing the output in an ImageView.
FaceMeshOptions faceMeshOptions = FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder() FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE) // API soon to become .setStaticImageMode(true)
.setMaxNumFaces(1) // setStaticImageMode(true) .setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
// Connects MediaPipe FaceMesh to the user-defined ImageView instance that allows // Connects MediaPipe Face Mesh Solution to the user-defined ImageView instance
// users to have the custom drawing of the output landmarks on it. // that allows users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java // See mediapipe/examples/android/solutions/facemesh/src/main/java/com/google/mediapipe/examples/facemesh/FaceMeshResultImageView.java
// as an example. // as an example.
FaceMeshResultImageView imageView = new FaceMeshResultImageView(this); FaceMeshResultImageView imageView = new FaceMeshResultImageView(this);
facemesh.setResultListener( faceMesh.setResultListener(
faceMeshResult -> { faceMeshResult -> {
int width = faceMeshResult.inputBitmap().getWidth(); int width = faceMeshResult.inputBitmap().getWidth();
int height = faceMeshResult.inputBitmap().getHeight(); int height = faceMeshResult.inputBitmap().getHeight();
@ -535,14 +582,14 @@ facemesh.setResultListener(
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f", "MediaPipe Face Mesh nose coordinates (pixel values): x=%f, y=%f",
noseLandmark.getX() * width, noseLandmark.getY() * height)); noseLandmark.getX() * width, noseLandmark.getY() * height));
// Request canvas drawing. // Request canvas drawing.
imageView.setFaceMeshResult(faceMeshResult); imageView.setFaceMeshResult(faceMeshResult);
runOnUiThread(() -> imageView.update()); runOnUiThread(() -> imageView.update());
}); });
facemesh.setErrorListener( faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); (message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// ActivityResultLauncher to get an image from the gallery as Bitmap. // ActivityResultLauncher to get an image from the gallery as Bitmap.
ActivityResultLauncher<Intent> imageGetter = ActivityResultLauncher<Intent> imageGetter =
@ -556,11 +603,12 @@ ActivityResultLauncher<Intent> imageGetter =
bitmap = bitmap =
MediaStore.Images.Media.getBitmap( MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData()); this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e); Log.e(TAG, "Bitmap reading error:" + e);
} }
if (bitmap != null) { if (bitmap != null) {
facemesh.send(bitmap); faceMesh.send(bitmap);
} }
} }
}); });
@ -575,17 +623,18 @@ imageGetter.launch(gallery);
// For video input and result rendering with OpenGL. // For video input and result rendering with OpenGL.
FaceMeshOptions faceMeshOptions = FaceMeshOptions faceMeshOptions =
FaceMeshOptions.builder() FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE) // API soon to become .setStaticImageMode(false)
.setMaxNumFaces(1) // setStaticImageMode(false) .setRefineLandmarks(true)
.setMaxNumFaces(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
FaceMesh facemesh = new FaceMesh(this, faceMeshOptions); FaceMesh faceMesh = new FaceMesh(this, faceMeshOptions);
facemesh.setErrorListener( faceMesh.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); (message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh. // Initializes a new VideoInput instance and connects it to MediaPipe Face Mesh Solution.
VideoInput videoInput = new VideoInput(this); VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener( videoInput.setNewFrameListener(
textureFrame -> facemesh.send(textureFrame)); textureFrame -> faceMesh.send(textureFrame));
// Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance // Initializes a new GlSurfaceView with a ResultGlRenderer<FaceMeshResult> instance
// that provides the interfaces to run user-defined OpenGL rendering code. // that provides the interfaces to run user-defined OpenGL rendering code.
@ -593,18 +642,18 @@ videoInput.setNewFrameListener(
// as an example. // as an example.
SolutionGlSurfaceView<FaceMeshResult> glSurfaceView = SolutionGlSurfaceView<FaceMeshResult> glSurfaceView =
new SolutionGlSurfaceView<>( new SolutionGlSurfaceView<>(
this, facemesh.getGlContext(), facemesh.getGlMajorVersion()); this, faceMesh.getGlContext(), faceMesh.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer()); glSurfaceView.setSolutionResultRenderer(new FaceMeshResultGlRenderer());
glSurfaceView.setRenderInputImage(true); glSurfaceView.setRenderInputImage(true);
facemesh.setResultListener( faceMesh.setResultListener(
faceMeshResult -> { faceMeshResult -> {
NormalizedLandmark noseLandmark = NormalizedLandmark noseLandmark =
result.multiFaceLandmarks().get(0).getLandmarkList().get(1); result.multiFaceLandmarks().get(0).getLandmarkList().get(1);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", "MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY())); noseLandmark.getX(), noseLandmark.getY()));
// Request GL rendering. // Request GL rendering.
glSurfaceView.setRenderData(faceMeshResult); glSurfaceView.setRenderData(faceMeshResult);
@ -623,7 +672,7 @@ ActivityResultLauncher<Intent> videoGetter =
videoInput.start( videoInput.start(
this, this,
resultIntent.getData(), resultIntent.getData(),
facemesh.getGlContext(), faceMesh.getGlContext(),
glSurfaceView.getWidth(), glSurfaceView.getWidth(),
glSurfaceView.getHeight())); glSurfaceView.getHeight()));
} }

View File

@ -269,12 +269,10 @@ with mp_hands.Hands(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(image) results = hands.process(image)
# Draw the hand annotations on the image. # Draw the hand annotations on the image.
@ -288,7 +286,8 @@ with mp_hands.Hands(
mp_hands.HAND_CONNECTIONS, mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style()) mp_drawing_styles.get_default_hand_connections_style())
cv2.imshow('MediaPipe Hands', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Hands', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -372,7 +371,7 @@ camera.start();
Please first follow general Please first follow general
[instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api) [instructions](../getting_started/android_solutions.md#integrate-mediapipe-android-solutions-api)
to add MediaPipe Gradle dependencies, then try the Hands solution API in the to add MediaPipe Gradle dependencies, then try the Hands Solution API in the
companion companion
[example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands) [example Android Studio project](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/solutions/hands)
following following
@ -391,14 +390,14 @@ Supported configuration options:
// For camera input and result rendering with OpenGL. // For camera input and result rendering with OpenGL.
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE) // API soon to become .setStaticImageMode(false)
.setMaxNumHands(1) // setStaticImageMode(false) .setMaxNumHands(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
hands.setErrorListener( hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); (message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new CameraInput instance and connects it to MediaPipe Hands. // Initializes a new CameraInput instance and connects it to MediaPipe Hands Solution.
CameraInput cameraInput = new CameraInput(this); CameraInput cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener( cameraInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame)); textureFrame -> hands.send(textureFrame));
@ -444,13 +443,13 @@ glSurfaceView.post(
// For reading images from gallery and drawing the output in an ImageView. // For reading images from gallery and drawing the output in an ImageView.
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setMode(HandsOptions.STATIC_IMAGE_MODE) // API soon to become .setStaticImageMode(true)
.setMaxNumHands(1) // setStaticImageMode(true) .setMaxNumHands(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
// Connects MediaPipe Hands to the user-defined ImageView instance that allows // Connects MediaPipe Hands Solution to the user-defined ImageView instance that
// users to have the custom drawing of the output landmarks on it. // allows users to have the custom drawing of the output landmarks on it.
// See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java // See mediapipe/examples/android/solutions/hands/src/main/java/com/google/mediapipe/examples/hands/HandsResultImageView.java
// as an example. // as an example.
HandsResultImageView imageView = new HandsResultImageView(this); HandsResultImageView imageView = new HandsResultImageView(this);
@ -484,6 +483,7 @@ ActivityResultLauncher<Intent> imageGetter =
bitmap = bitmap =
MediaStore.Images.Media.getBitmap( MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData()); this.getContentResolver(), resultIntent.getData());
// Please also rotate the Bitmap based on its orientation.
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e); Log.e(TAG, "Bitmap reading error:" + e);
} }
@ -503,14 +503,14 @@ imageGetter.launch(gallery);
// For video input and result rendering with OpenGL. // For video input and result rendering with OpenGL.
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE) // API soon to become .setStaticImageMode(false)
.setMaxNumHands(1) // setStaticImageMode(false) .setMaxNumHands(1)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
hands.setErrorListener( hands.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); (message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
// Initializes a new VideoInput instance and connects it to MediaPipe Hands. // Initializes a new VideoInput instance and connects it to MediaPipe Hands Solution.
VideoInput videoInput = new VideoInput(this); VideoInput videoInput = new VideoInput(this);
videoInput.setNewFrameListener( videoInput.setNewFrameListener(
textureFrame -> hands.send(textureFrame)); textureFrame -> hands.send(textureFrame));

View File

@ -147,6 +147,18 @@ If set to `true`, the solution filters pose landmarks across different input
images to reduce jitter, but ignored if [static_image_mode](#static_image_mode) images to reduce jitter, but ignored if [static_image_mode](#static_image_mode)
is also set to `true`. Default to `true`. is also set to `true`. Default to `true`.
#### enable_segmentation
If set to `true`, in addition to the pose, face and hand landmarks the solution
also generates the segmentation mask. Default to `false`.
#### smooth_segmentation
If set to `true`, the solution filters segmentation masks across different input
images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -207,6 +219,15 @@ the camera. The magnitude of `z` uses roughly the same scale as `x`.
A list of 21 hand landmarks on the right hand, in the same representation as A list of 21 hand landmarks on the right hand, in the same representation as
[left_hand_landmarks](#left_hand_landmarks). [left_hand_landmarks](#left_hand_landmarks).
#### segmentation_mask
The output segmentation mask, predicted only when
[enable_segmentation](#enable_segmentation) is set to `true`. The mask has the
same width and height as the input image, and contains values in `[0.0, 1.0]`
where `1.0` and `0.0` indicate high certainty of a "human" and "background"
pixel respectively. Please refer to the platform-specific usage examples below
for usage details.
### Python Solution API ### Python Solution API
Please first follow general [instructions](../getting_started/python.md) to Please first follow general [instructions](../getting_started/python.md) to
@ -218,6 +239,8 @@ Supported configuration options:
* [static_image_mode](#static_image_mode) * [static_image_mode](#static_image_mode)
* [model_complexity](#model_complexity) * [model_complexity](#model_complexity)
* [smooth_landmarks](#smooth_landmarks) * [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -232,7 +255,8 @@ mp_holistic = mp.solutions.holistic
IMAGE_FILES = [] IMAGE_FILES = []
with mp_holistic.Holistic( with mp_holistic.Holistic(
static_image_mode=True, static_image_mode=True,
model_complexity=2) as holistic: model_complexity=2,
enable_segmentation=True) as holistic:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
image_height, image_width, _ = image.shape image_height, image_width, _ = image.shape
@ -245,8 +269,16 @@ with mp_holistic.Holistic(
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, ' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})' f'{results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height})'
) )
# Draw pose, left and right hands, and face landmarks on the image.
annotated_image = image.copy() annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose, left and right hands, and face landmarks on the image.
mp_drawing.draw_landmarks( mp_drawing.draw_landmarks(
annotated_image, annotated_image,
results.face_landmarks, results.face_landmarks,
@ -277,12 +309,10 @@ with mp_holistic.Holistic(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = holistic.process(image) results = holistic.process(image)
# Draw landmark annotation on the image. # Draw landmark annotation on the image.
@ -301,7 +331,8 @@ with mp_holistic.Holistic(
mp_holistic.POSE_CONNECTIONS, mp_holistic.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles landmark_drawing_spec=mp_drawing_styles
.get_default_pose_landmarks_style()) .get_default_pose_landmarks_style())
cv2.imshow('MediaPipe Holistic', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Holistic', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()
@ -317,6 +348,8 @@ Supported configuration options:
* [modelComplexity](#model_complexity) * [modelComplexity](#model_complexity)
* [smoothLandmarks](#smooth_landmarks) * [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -349,8 +382,20 @@ const canvasCtx = canvasElement.getContext('2d');
function onResults(results) { function onResults(results) {
canvasCtx.save(); canvasCtx.save();
canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height); canvasCtx.clearRect(0, 0, canvasElement.width, canvasElement.height);
canvasCtx.drawImage(results.segmentationMask, 0, 0,
canvasElement.width, canvasElement.height);
// Only overwrite existing pixels.
canvasCtx.globalCompositeOperation = 'source-in';
canvasCtx.fillStyle = '#00FF00';
canvasCtx.fillRect(0, 0, canvasElement.width, canvasElement.height);
// Only overwrite missing pixels.
canvasCtx.globalCompositeOperation = 'destination-atop';
canvasCtx.drawImage( canvasCtx.drawImage(
results.image, 0, 0, canvasElement.width, canvasElement.height); results.image, 0, 0, canvasElement.width, canvasElement.height);
canvasCtx.globalCompositeOperation = 'source-over';
drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS, drawConnectors(canvasCtx, results.poseLandmarks, POSE_CONNECTIONS,
{color: '#00FF00', lineWidth: 4}); {color: '#00FF00', lineWidth: 4});
drawLandmarks(canvasCtx, results.poseLandmarks, drawLandmarks(canvasCtx, results.poseLandmarks,
@ -374,6 +419,8 @@ const holistic = new Holistic({locateFile: (file) => {
holistic.setOptions({ holistic.setOptions({
modelComplexity: 1, modelComplexity: 1,
smoothLandmarks: true, smoothLandmarks: true,
enableSegmentation: true,
smoothSegmentation: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });

View File

@ -41,7 +41,10 @@ one over the other.
* Face landmark model: * Face landmark model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite), [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark.tflite),
[TF.js model](https://tfhub.dev/mediapipe/facemesh/1) [TF.js model](https://tfhub.dev/mediapipe/facemesh/1)
* [Model card](https://mediapipe.page.link/facemesh-mc) * Face landmark model w/ attention (aka Attention Mesh):
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite)
* [Model card](https://mediapipe.page.link/facemesh-mc),
[Model card (w/ attention)](https://mediapipe.page.link/attentionmesh-mc)
### [Iris](https://google.github.io/mediapipe/solutions/iris) ### [Iris](https://google.github.io/mediapipe/solutions/iris)

View File

@ -338,11 +338,10 @@ with mp_objectron.Objectron(static_image_mode=False,
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Convert the BGR image to RGB.
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = objectron.process(image) results = objectron.process(image)
# Draw the box landmarks on the image. # Draw the box landmarks on the image.
@ -354,7 +353,8 @@ with mp_objectron.Objectron(static_image_mode=False,
image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS) image, detected_object.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
mp_drawing.draw_axis(image, detected_object.rotation, mp_drawing.draw_axis(image, detected_object.rotation,
detected_object.translation) detected_object.translation)
cv2.imshow('MediaPipe Objectron', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Objectron', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()

View File

@ -316,12 +316,10 @@ with mp_pose.Pose(
# If loading a video, use 'break' instead of 'continue'. # If loading a video, use 'break' instead of 'continue'.
continue continue
# Flip the image horizontally for a later selfie-view display, and convert
# the BGR image to RGB.
image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to # To improve performance, optionally mark the image as not writeable to
# pass by reference. # pass by reference.
image.flags.writeable = False image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pose.process(image) results = pose.process(image)
# Draw the pose annotation on the image. # Draw the pose annotation on the image.
@ -332,7 +330,8 @@ with mp_pose.Pose(
results.pose_landmarks, results.pose_landmarks,
mp_pose.POSE_CONNECTIONS, mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()) landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imshow('MediaPipe Pose', image) # Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27: if cv2.waitKey(5) & 0xFF == 27:
break break
cap.release() cap.release()

View File

@ -30,7 +30,7 @@ message FlowLimiterCalculatorOptions {
optional int32 max_in_flight = 1 [default = 1]; optional int32 max_in_flight = 1 [default = 1];
// The maximum number of frames queued waiting for processing. // The maximum number of frames queued waiting for processing.
// The default value limits to 1 frame awaiting processing. // The default value limits to 0 frames awaiting processing.
optional int32 max_in_queue = 2 [default = 0]; optional int32 max_in_queue = 2 [default = 0];
// The maximum time in microseconds to wait for a frame to finish processing. // The maximum time in microseconds to wait for a frame to finish processing.

View File

@ -80,4 +80,7 @@ typedef SplitVectorCalculator<mediapipe::ClassificationList, false>
SplitClassificationListVectorCalculator; SplitClassificationListVectorCalculator;
REGISTER_CALCULATOR(SplitClassificationListVectorCalculator); REGISTER_CALCULATOR(SplitClassificationListVectorCalculator);
typedef SplitVectorCalculator<uint64_t, false> SplitUint64tVectorCalculator;
REGISTER_CALCULATOR(SplitUint64tVectorCalculator);
} // namespace mediapipe } // namespace mediapipe

View File

@ -480,8 +480,7 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc,
if (cc->Inputs().HasTag(kRectTag)) { if (cc->Inputs().HasTag(kRectTag)) {
const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>(); const auto& rect = cc->Inputs().Tag(kRectTag).Get<Rect>();
// Only use the rect if it is valid. // Only use the rect if it is valid.
if (rect.width() > 0 && rect.height() > 0 && rect.x_center() >= 0 && if (rect.width() > 0 && rect.height() > 0) {
rect.y_center() >= 0) {
x_center = rect.x_center(); x_center = rect.x_center();
y_center = rect.y_center(); y_center = rect.y_center();
crop_width = rect.width(); crop_width = rect.width();

View File

@ -337,12 +337,15 @@ absl::Status ImageTransformationCalculator::Process(CalculatorContext* cc) {
!cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) { !cc->Inputs().Tag("FLIP_VERTICALLY").IsEmpty()) {
flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>(); flip_vertically_ = cc->Inputs().Tag("FLIP_VERTICALLY").Get<bool>();
} }
if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS") && if (cc->Inputs().HasTag("OUTPUT_DIMENSIONS")) {
!cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) { if (cc->Inputs().Tag("OUTPUT_DIMENSIONS").IsEmpty()) {
const auto& image_size = return absl::OkStatus();
cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<std::pair<int, int>>(); } else {
output_width_ = image_size.first; const auto& image_size =
output_height_ = image_size.second; cc->Inputs().Tag("OUTPUT_DIMENSIONS").Get<std::pair<int, int>>();
output_width_ = image_size.first;
output_height_ = image_size.second;
}
} }
if (use_gpu_) { if (use_gpu_) {
@ -506,6 +509,14 @@ absl::Status ImageTransformationCalculator::RenderGpu(CalculatorContext* cc) {
ComputeOutputDimensions(input_width, input_height, &output_width, ComputeOutputDimensions(input_width, input_height, &output_width,
&output_height); &output_height);
if (scale_mode_ == mediapipe::ScaleMode_Mode_FILL_AND_CROP) {
const float scale =
std::min(static_cast<float>(output_width_) / input_width,
static_cast<float>(output_height_) / input_height);
output_width = std::round(input_width * scale);
output_height = std::round(input_height * scale);
}
if (cc->Outputs().HasTag("LETTERBOX_PADDING")) { if (cc->Outputs().HasTag("LETTERBOX_PADDING")) {
auto padding = absl::make_unique<std::array<float, 4>>(); auto padding = absl::make_unique<std::array<float, 4>>();
ComputeOutputLetterboxPadding(input_width, input_height, output_width, ComputeOutputLetterboxPadding(input_width, input_height, output_width,

View File

@ -53,7 +53,7 @@ enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
// The alpha channel can be set to a single value, or come from an image mask. // The alpha channel can be set to a single value, or come from an image mask.
// If the input image has an alpha channel, it will be updated. // If the input image has an alpha channel, it will be updated.
// If the input image doesn't have an alpha channel, one will be added. // If the input image doesn't have an alpha channel, one will be added.
// Adding alpha channel to a Grayscale (single channel) input is not suported. // Adding alpha channel to a Grayscale (single channel) input is not supported.
// //
// Inputs: // Inputs:
// One of the following two IMAGE tags: // One of the following two IMAGE tags:

View File

@ -1384,6 +1384,32 @@ cc_library(
alwayslink = 1, alwayslink = 1,
) )
mediapipe_proto_library(
name = "landmarks_refinement_calculator_proto",
srcs = ["landmarks_refinement_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "landmarks_refinement_calculator",
srcs = ["landmarks_refinement_calculator.cc"],
hdrs = ["landmarks_refinement_calculator.h"],
deps = [
":landmarks_refinement_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/api2:node",
"//mediapipe/framework/api2:port",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:core_proto",
"//mediapipe/framework/port:ret_check",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
)
cc_test( cc_test(
name = "refine_landmarks_from_heatmap_calculator_test", name = "refine_landmarks_from_heatmap_calculator_test",
srcs = ["refine_landmarks_from_heatmap_calculator_test.cc"], srcs = ["refine_landmarks_from_heatmap_calculator_test.cc"],

View File

@ -0,0 +1,197 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/calculators/util/landmarks_refinement_calculator.h"
#include <algorithm>
#include <set>
#include <utility>
#include "absl/memory/memory.h"
#include "mediapipe/calculators/util/landmarks_refinement_calculator.pb.h"
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/proto_ns.h"
#include "mediapipe/framework/port/ret_check.h"
namespace mediapipe {
namespace api2 {
namespace {
absl::StatusOr<int> GetNumberOfRefinedLandmarks(
const proto_ns::RepeatedPtrField<
LandmarksRefinementCalculatorOptions::Refinement>& refinements) {
// Gather all used indexes.
std::set<int> idxs;
for (int i = 0; i < refinements.size(); ++i) {
const auto& refinement = refinements.Get(i);
for (int i = 0; i < refinement.indexes_mapping_size(); ++i) {
idxs.insert(refinement.indexes_mapping(i));
}
}
// Check that indxes start with 0 and there is no gaps between min and max
// indexes.
RET_CHECK(!idxs.empty())
<< "There should be at least one landmark in indexes mapping";
int idxs_min = *idxs.begin();
int idxs_max = *idxs.rbegin();
int n_idxs = idxs.size();
RET_CHECK_EQ(idxs_min, 0)
<< "Indexes are expected to start with 0 instead of " << idxs_min;
RET_CHECK_EQ(idxs_max, n_idxs - 1)
<< "Indexes should have no gaps but " << idxs_max - n_idxs + 1
<< " indexes are missing";
return n_idxs;
}
void RefineXY(const proto_ns::RepeatedField<int>& indexes_mapping,
const NormalizedLandmarkList& landmarks,
NormalizedLandmarkList* refined_landmarks) {
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const auto& landmark = landmarks.landmark(i);
auto* refined_landmark =
refined_landmarks->mutable_landmark(indexes_mapping.Get(i));
refined_landmark->set_x(landmark.x());
refined_landmark->set_y(landmark.y());
}
}
float GetZAverage(const NormalizedLandmarkList& landmarks,
const proto_ns::RepeatedField<int>& indexes) {
double z_sum = 0;
for (int i = 0; i < indexes.size(); ++i) {
z_sum += landmarks.landmark(indexes.Get(i)).z();
}
return z_sum / indexes.size();
}
void RefineZ(
const proto_ns::RepeatedField<int>& indexes_mapping,
const LandmarksRefinementCalculatorOptions::ZRefinement& z_refinement,
const NormalizedLandmarkList& landmarks,
NormalizedLandmarkList* refined_landmarks) {
if (z_refinement.has_none()) {
// Do nothing and keep Z that is already in refined landmarks.
} else if (z_refinement.has_copy()) {
for (int i = 0; i < landmarks.landmark_size(); ++i) {
refined_landmarks->mutable_landmark(indexes_mapping.Get(i))
->set_z(landmarks.landmark(i).z());
}
} else if (z_refinement.has_assign_average()) {
const float z_average =
GetZAverage(*refined_landmarks,
z_refinement.assign_average().indexes_for_average());
for (int i = 0; i < indexes_mapping.size(); ++i) {
refined_landmarks->mutable_landmark(indexes_mapping.Get(i))
->set_z(z_average);
}
} else {
CHECK(false) << "Z refinement is either not specified or not supported";
}
}
} // namespace
class LandmarksRefinementCalculatorImpl
: public NodeImpl<LandmarksRefinementCalculator> {
absl::Status Open(CalculatorContext* cc) override {
options_ = cc->Options<LandmarksRefinementCalculatorOptions>();
// Validate refinements.
for (int i = 0; i < options_.refinement_size(); ++i) {
const auto& refinement = options_.refinement(i);
RET_CHECK_GT(refinement.indexes_mapping_size(), 0)
<< "Refinement " << i << " has no indexes mapping";
RET_CHECK(refinement.has_z_refinement())
<< "Refinement " << i << " has no Z refinement specified";
RET_CHECK(refinement.z_refinement().has_none() ^
refinement.z_refinement().has_copy() ^
refinement.z_refinement().has_assign_average())
<< "Exactly one Z refinement should be specified";
const auto z_refinement = refinement.z_refinement();
if (z_refinement.has_assign_average()) {
RET_CHECK_GT(z_refinement.assign_average().indexes_for_average_size(),
0)
<< "When using assign average Z refinement at least one index for "
"averagin should be specified";
}
}
// Validate indexes mapping and get total number of refined landmarks.
ASSIGN_OR_RETURN(n_refined_landmarks_,
GetNumberOfRefinedLandmarks(options_.refinement()));
// Validate that number of refinements and landmark streams is the same.
RET_CHECK_EQ(kLandmarks(cc).Count(), options_.refinement_size())
<< "There are " << options_.refinement_size() << " refinements while "
<< kLandmarks(cc).Count() << " landmark streams";
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
// If any of the refinement landmarks is missing - refinement won't happen.
for (const auto& landmarks_stream : kLandmarks(cc)) {
if (landmarks_stream.IsEmpty()) {
return absl::OkStatus();
}
}
// Initialize refined landmarks list.
auto refined_landmarks = absl::make_unique<NormalizedLandmarkList>();
for (int i = 0; i < n_refined_landmarks_; ++i) {
refined_landmarks->add_landmark();
}
// Apply input landmarks to outpu refined landmarks in provided order.
for (int i = 0; i < kLandmarks(cc).Count(); ++i) {
const auto& landmarks = kLandmarks(cc)[i].Get();
const auto& refinement = options_.refinement(i);
// Check number of landmarks in mapping and stream are the same.
RET_CHECK_EQ(landmarks.landmark_size(), refinement.indexes_mapping_size())
<< "There are " << landmarks.landmark_size()
<< " refinement landmarks while mapping has "
<< refinement.indexes_mapping_size();
// Refine X and Y.
RefineXY(refinement.indexes_mapping(), landmarks,
refined_landmarks.get());
// Refine Z.
RefineZ(refinement.indexes_mapping(), refinement.z_refinement(),
landmarks, refined_landmarks.get());
// Visibility and presence are not currently refined and are left as `0`.
}
kRefinedLandmarks(cc).Send(std::move(refined_landmarks));
return absl::OkStatus();
}
private:
LandmarksRefinementCalculatorOptions options_;
int n_refined_landmarks_ = 0;
};
MEDIAPIPE_NODE_IMPLEMENTATION(LandmarksRefinementCalculatorImpl);
} // namespace api2
} // namespace mediapipe

View File

@ -0,0 +1,85 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_REFINEMENT_CALCULATOR_H_
#define MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_REFINEMENT_CALCULATOR_H_
#include "mediapipe/framework/api2/node.h"
#include "mediapipe/framework/api2/port.h"
#include "mediapipe/framework/formats/landmark.pb.h"
namespace mediapipe {
namespace api2 {
// A calculator to refine one set of landmarks with another.
//
// Inputs:
// LANDMARKS: Multiple NormalizedLandmarkList to use for
// refinement. They will be applied to the resulting REFINED_LANDMARKS in
// the provided order. Each list should be non empty and contain the same
// amount of landmarks as indexes in mapping. Number of lists should be the
// same as number of refinements in options.
//
// Outputs:
// REFINED_LANDMARKS: A NormalizedLandmarkList with refined landmarks. Number
// of produced landmarks is equal to to the maximum index mapping number in
// calculator options (calculator verifies that there are no gaps in the
// mapping).
//
// Examples config:
// node {
// calculator: "LandmarksRefinementCalculator"
// input_stream: "LANDMARKS:0:mesh_landmarks"
// input_stream: "LANDMARKS:1:lips_landmarks"
// input_stream: "LANDMARKS:2:left_eye_landmarks"
// input_stream: "LANDMARKS:3:right_eye_landmarks"
// output_stream: "REFINED_LANDMARKS:landmarks"
// options: {
// [mediapipe.LandmarksRefinementCalculatorOptions.ext] {
// refinement: {
// indexes_mapping: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
// z_refinement: { copy {} }
// }
// refinement: {
// indexes_mapping: [0, 1, 2, 3]
// z_refinement: { none {} }
// }
// refinement: {
// indexes_mapping: [4, 5]
// z_refinement: { none {} }
// }
// refinement: {
// indexes_mapping: [6, 7]
// z_refinement: { none {} }
// }
// }
// }
// }
//
class LandmarksRefinementCalculator : public NodeIntf {
public:
static constexpr Input<::mediapipe::NormalizedLandmarkList>::Multiple
kLandmarks{"LANDMARKS"};
static constexpr Output<::mediapipe::NormalizedLandmarkList>
kRefinedLandmarks{"REFINED_LANDMARKS"};
MEDIAPIPE_NODE_INTERFACE(LandmarksRefinementCalculator, kLandmarks,
kRefinedLandmarks);
};
} // namespace api2
} // namespace mediapipe
#endif // MEDIAPIPE_CALCULATORS_UTIL_LANDMARKS_REFINEMENT_CALCULATOR_H_

View File

@ -0,0 +1,71 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message LandmarksRefinementCalculatorOptions {
extend CalculatorOptions {
optional LandmarksRefinementCalculatorOptions ext = 381914658;
}
// Do nothing and keep those Z that are already present in the resulting set
// of landmarks.
message ZRefinementNone {}
// Simply copy Z values from the given set of landmarks to the resulting set
// of landmarks.
message ZRefinementCopy {}
// Calculate average of the specified set of landmarks in the resulting set
// and use it as Z for all given landmarks when assigning their values to the
// resulting set of landmarks.
message ZRefinementAssignAverage {
// Indexes of the resulting landmarks to use for average. Should be non
// empty.
repeated int32 indexes_for_average = 1;
}
// Specifies the set of instructions on assigning z value from the given set
// of landmarks to the resulting set of landmarks.
message ZRefinement {
// Exactly one Z refinement option should be specified.
oneof z_refinement_options {
ZRefinementNone none = 1;
ZRefinementCopy copy = 2;
ZRefinementAssignAverage assign_average = 3;
}
}
// Specifies the set of instructions of assigning values to the resulting set
// of landmarks.
message Refinement {
// Maps indexes of the given set of landmarks to indexes of the resulting
// set of landmarks. Should be non empty and contain the same amount of
// indexes as landmarks in the corresponding input stream.
repeated int32 indexes_mapping = 1;
// Z refinement instructions.
optional ZRefinement z_refinement = 2;
}
// Refinement instructions for every landmarks input stream. Applied in the
// same order as defined. Should be the same amount of refinements as landmark
// input streams in the calculator. Union of index mappings should start with
// 0 and cover a contineous range.
repeated Refinement refinement = 1;
}

View File

@ -86,11 +86,11 @@ inline void GetMinMaxZ(const LandmarkListType& landmarks, float* z_min,
} }
template <class LandmarkType> template <class LandmarkType>
bool IsLandmarkVisibileAndPresent(const LandmarkType& landmark, bool IsLandmarkVisibleAndPresent(const LandmarkType& landmark,
bool utilize_visibility, bool utilize_visibility,
float visibility_threshold, float visibility_threshold,
bool utilize_presence, bool utilize_presence,
float presence_threshold) { float presence_threshold) {
if (utilize_visibility && landmark.has_visibility() && if (utilize_visibility && landmark.has_visibility() &&
landmark.visibility() < visibility_threshold) { landmark.visibility() < visibility_threshold) {
return false; return false;
@ -153,12 +153,16 @@ void AddConnectionsWithDepth(const LandmarkListType& landmarks,
const Color& max_depth_line_color, const Color& max_depth_line_color,
RenderData* render_data) { RenderData* render_data) {
for (int i = 0; i < landmark_connections.size(); i += 2) { for (int i = 0; i < landmark_connections.size(); i += 2) {
if (landmark_connections[i] >= landmarks.landmark_size() ||
landmark_connections[i + 1] >= landmarks.landmark_size()) {
continue;
}
const auto& ld0 = landmarks.landmark(landmark_connections[i]); const auto& ld0 = landmarks.landmark(landmark_connections[i]);
const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]); const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]);
if (!IsLandmarkVisibileAndPresent<LandmarkType>( if (!IsLandmarkVisibleAndPresent<LandmarkType>(
ld0, utilize_visibility, visibility_threshold, utilize_presence, ld0, utilize_visibility, visibility_threshold, utilize_presence,
presence_threshold) || presence_threshold) ||
!IsLandmarkVisibileAndPresent<LandmarkType>( !IsLandmarkVisibleAndPresent<LandmarkType>(
ld1, utilize_visibility, visibility_threshold, utilize_presence, ld1, utilize_visibility, visibility_threshold, utilize_presence,
presence_threshold)) { presence_threshold)) {
continue; continue;
@ -196,12 +200,16 @@ void AddConnections(const LandmarkListType& landmarks,
const Color& connection_color, float thickness, const Color& connection_color, float thickness,
bool normalized, RenderData* render_data) { bool normalized, RenderData* render_data) {
for (int i = 0; i < landmark_connections.size(); i += 2) { for (int i = 0; i < landmark_connections.size(); i += 2) {
if (landmark_connections[i] >= landmarks.landmark_size() ||
landmark_connections[i + 1] >= landmarks.landmark_size()) {
continue;
}
const auto& ld0 = landmarks.landmark(landmark_connections[i]); const auto& ld0 = landmarks.landmark(landmark_connections[i]);
const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]); const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]);
if (!IsLandmarkVisibileAndPresent<LandmarkType>( if (!IsLandmarkVisibleAndPresent<LandmarkType>(
ld0, utilize_visibility, visibility_threshold, utilize_presence, ld0, utilize_visibility, visibility_threshold, utilize_presence,
presence_threshold) || presence_threshold) ||
!IsLandmarkVisibileAndPresent<LandmarkType>( !IsLandmarkVisibleAndPresent<LandmarkType>(
ld1, utilize_visibility, visibility_threshold, utilize_presence, ld1, utilize_visibility, visibility_threshold, utilize_presence,
presence_threshold)) { presence_threshold)) {
continue; continue;
@ -317,7 +325,7 @@ absl::Status LandmarksToRenderDataCalculator::Process(CalculatorContext* cc) {
for (int i = 0; i < landmarks.landmark_size(); ++i) { for (int i = 0; i < landmarks.landmark_size(); ++i) {
const Landmark& landmark = landmarks.landmark(i); const Landmark& landmark = landmarks.landmark(i);
if (!IsLandmarkVisibileAndPresent<Landmark>( if (!IsLandmarkVisibleAndPresent<Landmark>(
landmark, options_.utilize_visibility(), landmark, options_.utilize_visibility(),
options_.visibility_threshold(), options_.utilize_presence(), options_.visibility_threshold(), options_.utilize_presence(),
options_.presence_threshold())) { options_.presence_threshold())) {
@ -363,7 +371,7 @@ absl::Status LandmarksToRenderDataCalculator::Process(CalculatorContext* cc) {
for (int i = 0; i < landmarks.landmark_size(); ++i) { for (int i = 0; i < landmarks.landmark_size(); ++i) {
const NormalizedLandmark& landmark = landmarks.landmark(i); const NormalizedLandmark& landmark = landmarks.landmark(i);
if (!IsLandmarkVisibileAndPresent<NormalizedLandmark>( if (!IsLandmarkVisibleAndPresent<NormalizedLandmark>(
landmark, options_.utilize_visibility(), landmark, options_.utilize_visibility(),
options_.visibility_threshold(), options_.utilize_presence(), options_.visibility_threshold(), options_.utilize_presence(),
options_.presence_threshold())) { options_.presence_threshold())) {

View File

@ -36,7 +36,7 @@ inline float NormalizeRadians(float angle) {
} // namespace } // namespace
// Performs geometric transformation to the input Rect or NormalizedRect, // Performs geometric transformation to the input Rect or NormalizedRect,
// correpsonding to input stream RECT or NORM_RECT respectively. When the input // corresponding to input stream RECT or NORM_RECT respectively. When the input
// is NORM_RECT, an addition input stream IMAGE_SIZE is required, which is a // is NORM_RECT, an addition input stream IMAGE_SIZE is required, which is a
// std::pair<int, int> representing the image width and height. // std::pair<int, int> representing the image width and height.
// //

View File

@ -12,5 +12,12 @@ cd /d %~dp0
cd facemesh\src\main cd facemesh\src\main
rm res rm res
mklink /d res ..\..\..\res mklink /d res ..\..\..\res
@rem for face detection example app.
cd /d %~dp0
cd facedetection\src\main
rm res
mklink /d res ..\..\..\res
dir dir
pause pause

View File

@ -0,0 +1,51 @@
plugins {
id 'com.android.application'
}
android {
compileSdkVersion 30
buildToolsVersion "30.0.3"
defaultConfig {
applicationId "com.google.mediapipe.apps.facedetection"
minSdkVersion 21
targetSdkVersion 30
versionCode 1
versionName "1.0"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
}
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar'])
implementation 'androidx.appcompat:appcompat:1.3.0'
implementation 'com.google.android.material:material:1.3.0'
implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
implementation 'androidx.exifinterface:exifinterface:1.3.3'
testImplementation 'junit:junit:4.+'
androidTestImplementation 'androidx.test.ext:junit:1.1.2'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
// MediaPipe Face Detection Solution components.
implementation 'com.google.mediapipe:solution-core:latest.release'
implementation 'com.google.mediapipe:facedetection:latest.release'
// MediaPipe deps
implementation 'com.google.flogger:flogger:0.6'
implementation 'com.google.flogger:flogger-system-backend:0.6'
implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library
def camerax_version = "1.0.0-beta10"
implementation "androidx.camera:camera-core:$camerax_version"
implementation "androidx.camera:camera-camera2:$camerax_version"
implementation "androidx.camera:camera-lifecycle:$camerax_version"
}

View File

@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.examples.facedetection">
<uses-sdk
android:minSdkVersion="21"
android:targetSdkVersion="30" />
<!-- For loading images from gallery -->
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<!-- For using the camera -->
<uses-permission android:name="android.permission.CAMERA" />
<uses-feature android:name="android.hardware.camera" />
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"
android:label="MediaPipe Face Detection"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity android:name=".MainActivity"
android:screenOrientation="portrait">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@ -0,0 +1,46 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:private"])
android_binary(
name = "facedetection",
srcs = glob(["**/*.java"]),
custom_package = "com.google.mediapipe.examples.facedetection",
manifest = "AndroidManifest.xml",
manifest_values = {
"applicationId": "com.google.mediapipe.examples.facedetection",
},
multidex = "native",
resource_files = ["//mediapipe/examples/android/solutions:resource_files"],
deps = [
"//mediapipe/framework/formats:detection_java_proto_lite",
"//mediapipe/framework/formats:location_data_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/solutioncore:camera_input",
"//mediapipe/java/com/google/mediapipe/solutioncore:mediapipe_jni_lib",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_rendering",
"//mediapipe/java/com/google/mediapipe/solutioncore:video_input",
"//mediapipe/java/com/google/mediapipe/solutions/facedetection",
"//third_party:androidx_appcompat",
"//third_party:androidx_constraint_layout",
"//third_party:opencv",
"@maven//:androidx_activity_activity",
"@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_exifinterface_exifinterface",
"@maven//:androidx_fragment_fragment",
"@maven//:com_google_guava_guava",
],
)

View File

@ -0,0 +1,146 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facedetection;
import android.opengl.GLES20;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.solutioncore.ResultGlRenderer;
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
/** A custom implementation of {@link ResultGlRenderer} to render {@link FaceDetectionResult}. */
public class FaceDetectionResultGlRenderer implements ResultGlRenderer<FaceDetectionResult> {
private static final String TAG = "FaceDetectionResultGlRenderer";
private static final float[] KEYPOINT_COLOR = new float[] {1f, 0f, 0f, 1f};
private static final float KEYPOINT_SIZE = 16f;
private static final float[] BBOX_COLOR = new float[] {0f, 1f, 0f, 1f};
private static final int BBOX_THICKNESS = 8;
private static final String VERTEX_SHADER =
"uniform mat4 uProjectionMatrix;\n"
+ "uniform float uPointSize;\n"
+ "attribute vec4 vPosition;\n"
+ "void main() {\n"
+ " gl_Position = uProjectionMatrix * vPosition;\n"
+ " gl_PointSize = uPointSize;"
+ "}";
private static final String FRAGMENT_SHADER =
"precision mediump float;\n"
+ "uniform vec4 uColor;\n"
+ "void main() {\n"
+ " gl_FragColor = uColor;\n"
+ "}";
private int program;
private int positionHandle;
private int pointSizeHandle;
private int projectionMatrixHandle;
private int colorHandle;
private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type);
GLES20.glShaderSource(shader, shaderCode);
GLES20.glCompileShader(shader);
return shader;
}
@Override
public void setupRendering() {
program = GLES20.glCreateProgram();
int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER);
int fragmentShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER);
GLES20.glAttachShader(program, vertexShader);
GLES20.glAttachShader(program, fragmentShader);
GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
pointSizeHandle = GLES20.glGetUniformLocation(program, "uPointSize");
projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
}
@Override
public void renderResult(FaceDetectionResult result, float[] projectionMatrix) {
if (result == null) {
return;
}
GLES20.glUseProgram(program);
GLES20.glUniformMatrix4fv(projectionMatrixHandle, 1, false, projectionMatrix, 0);
GLES20.glUniform1f(pointSizeHandle, KEYPOINT_SIZE);
int numDetectedFaces = result.multiFaceDetections().size();
for (int i = 0; i < numDetectedFaces; ++i) {
drawDetection(result.multiFaceDetections().get(i));
}
}
/**
* Deletes the shader program.
*
* <p>This is only necessary if one wants to release the program while keeping the context around.
*/
public void release() {
GLES20.glDeleteProgram(program);
}
private void drawDetection(Detection detection) {
if (!detection.hasLocationData()) {
return;
}
// Draw keypoints.
float[] points = new float[FaceKeypoint.NUM_KEY_POINTS * 2];
for (int i = 0; i < FaceKeypoint.NUM_KEY_POINTS; ++i) {
points[2 * i] = detection.getLocationData().getRelativeKeypoints(i).getX();
points[2 * i + 1] = detection.getLocationData().getRelativeKeypoints(i).getY();
}
GLES20.glUniform4fv(colorHandle, 1, KEYPOINT_COLOR, 0);
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(points.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(points);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_POINTS, 0, FaceKeypoint.NUM_KEY_POINTS);
if (!detection.getLocationData().hasRelativeBoundingBox()) {
return;
}
// Draw bounding box.
float left = detection.getLocationData().getRelativeBoundingBox().getXmin();
float top = detection.getLocationData().getRelativeBoundingBox().getYmin();
float right = left + detection.getLocationData().getRelativeBoundingBox().getWidth();
float bottom = top + detection.getLocationData().getRelativeBoundingBox().getHeight();
drawLine(top, left, top, right);
drawLine(bottom, left, bottom, right);
drawLine(top, left, bottom, left);
drawLine(top, right, bottom, right);
}
private void drawLine(float y1, float x1, float y2, float x2) {
GLES20.glUniform4fv(colorHandle, 1, BBOX_COLOR, 0);
GLES20.glLineWidth(BBOX_THICKNESS);
float[] vertex = {x1, y1, x2, y2};
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertex.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertex);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 2, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
}
}

View File

@ -0,0 +1,108 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facedetection;
import static java.lang.Math.min;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Matrix;
import android.graphics.Paint;
import androidx.appcompat.widget.AppCompatImageView;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
/** An ImageView implementation for displaying {@link FaceDetectionResult}. */
public class FaceDetectionResultImageView extends AppCompatImageView {
private static final String TAG = "FaceDetectionResultImageView";
private static final int KEYPOINT_COLOR = Color.RED;
private static final int KEYPOINT_RADIUS = 15;
private static final int BBOX_COLOR = Color.GREEN;
private static final int BBOX_THICKNESS = 10;
private Bitmap latest;
public FaceDetectionResultImageView(Context context) {
super(context);
setScaleType(AppCompatImageView.ScaleType.FIT_CENTER);
}
/**
* Sets a {@link FaceDetectionResult} to render.
*
* @param result a {@link FaceDetectionResult} object that contains the solution outputs and the
* input {@link Bitmap}.
*/
public void setFaceDetectionResult(FaceDetectionResult result) {
if (result == null) {
return;
}
Bitmap bmInput = result.inputBitmap();
int width = bmInput.getWidth();
int height = bmInput.getHeight();
latest = Bitmap.createBitmap(width, height, bmInput.getConfig());
Canvas canvas = new Canvas(latest);
canvas.drawBitmap(bmInput, new Matrix(), null);
int numDetectedFaces = result.multiFaceDetections().size();
for (int i = 0; i < numDetectedFaces; ++i) {
drawDetectionOnCanvas(result.multiFaceDetections().get(i), canvas, width, height);
}
}
/** Updates the image view with the latest {@link FaceDetectionResult}. */
public void update() {
postInvalidate();
if (latest != null) {
setImageBitmap(latest);
}
}
private void drawDetectionOnCanvas(Detection detection, Canvas canvas, int width, int height) {
if (!detection.hasLocationData()) {
return;
}
// Draw keypoints.
Paint keypointPaint = new Paint();
keypointPaint.setColor(KEYPOINT_COLOR);
for (int i = 0; i < FaceKeypoint.NUM_KEY_POINTS; ++i) {
int xPixel =
min(
(int) (detection.getLocationData().getRelativeKeypoints(i).getX() * width),
width - 1);
int yPixel =
min(
(int) (detection.getLocationData().getRelativeKeypoints(i).getY() * height),
height - 1);
canvas.drawCircle(xPixel, yPixel, KEYPOINT_RADIUS, keypointPaint);
}
if (!detection.getLocationData().hasRelativeBoundingBox()) {
return;
}
// Draw bounding box.
Paint bboxPaint = new Paint();
bboxPaint.setColor(BBOX_COLOR);
bboxPaint.setStyle(Paint.Style.STROKE);
bboxPaint.setStrokeWidth(BBOX_THICKNESS);
float left = detection.getLocationData().getRelativeBoundingBox().getXmin() * width;
float top = detection.getLocationData().getRelativeBoundingBox().getYmin() * height;
float right = left + detection.getLocationData().getRelativeBoundingBox().getWidth() * width;
float bottom = top + detection.getLocationData().getRelativeBoundingBox().getHeight() * height;
canvas.drawRect(left, top, right, bottom, bboxPaint);
}
}

View File

@ -0,0 +1,341 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.examples.facedetection;
import android.content.Intent;
import android.graphics.Bitmap;
import android.graphics.Matrix;
import android.os.Bundle;
import android.provider.MediaStore;
import androidx.appcompat.app.AppCompatActivity;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.FrameLayout;
import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
import com.google.mediapipe.solutioncore.VideoInput;
import com.google.mediapipe.solutions.facedetection.FaceDetection;
import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions;
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
import java.io.IOException;
import java.io.InputStream;
/** Main activity of MediaPipe Face Detection app. */
public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity";
private FaceDetection faceDetection;
private enum InputSource {
UNKNOWN,
IMAGE,
VIDEO,
CAMERA,
}
private InputSource inputSource = InputSource.UNKNOWN;
// Image demo UI and image loader components.
private ActivityResultLauncher<Intent> imageGetter;
private FaceDetectionResultImageView imageView;
// Video demo UI and video loader components.
private VideoInput videoInput;
private ActivityResultLauncher<Intent> videoGetter;
// Live camera demo UI and camera components.
private CameraInput cameraInput;
private SolutionGlSurfaceView<FaceDetectionResult> glSurfaceView;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
setupStaticImageDemoUiComponents();
setupVideoDemoUiComponents();
setupLiveDemoUiComponents();
}
@Override
protected void onResume() {
super.onResume();
if (inputSource == InputSource.CAMERA) {
// Restarts the camera and the opengl surface rendering.
cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> faceDetection.send(textureFrame));
glSurfaceView.post(this::startCamera);
glSurfaceView.setVisibility(View.VISIBLE);
} else if (inputSource == InputSource.VIDEO) {
videoInput.resume();
}
}
@Override
protected void onPause() {
super.onPause();
if (inputSource == InputSource.CAMERA) {
glSurfaceView.setVisibility(View.GONE);
cameraInput.close();
} else if (inputSource == InputSource.VIDEO) {
videoInput.pause();
}
}
/** Sets up the UI components for the static image demo. */
private void setupStaticImageDemoUiComponents() {
// The Intent to access gallery and read images as bitmap.
imageGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
Bitmap bitmap = null;
try {
bitmap =
MediaStore.Images.Media.getBitmap(
this.getContentResolver(), resultIntent.getData());
} catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e);
}
try {
InputStream imageData =
this.getContentResolver().openInputStream(resultIntent.getData());
int orientation =
new ExifInterface(imageData)
.getAttributeInt(
ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);
if (orientation != ExifInterface.ORIENTATION_NORMAL) {
Matrix matrix = new Matrix();
switch (orientation) {
case ExifInterface.ORIENTATION_ROTATE_90:
matrix.postRotate(90);
break;
case ExifInterface.ORIENTATION_ROTATE_180:
matrix.postRotate(180);
break;
case ExifInterface.ORIENTATION_ROTATE_270:
matrix.postRotate(270);
break;
default:
matrix.postRotate(0);
}
bitmap =
Bitmap.createBitmap(
bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
}
} catch (IOException e) {
Log.e(TAG, "Bitmap rotation error:" + e);
}
if (bitmap != null) {
faceDetection.send(bitmap);
}
}
}
});
Button loadImageButton = findViewById(R.id.button_load_picture);
loadImageButton.setOnClickListener(
v -> {
if (inputSource != InputSource.IMAGE) {
stopCurrentPipeline();
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
});
imageView = new FaceDetectionResultImageView(this);
}
/** Sets up core workflow for static image mode. */
private void setupStaticImageModePipeline() {
this.inputSource = InputSource.IMAGE;
// Initializes a new MediaPipe Face Detection solution instance in the static image mode.
faceDetection =
new FaceDetection(
this,
FaceDetectionOptions.builder()
.setStaticImageMode(true)
.setModelSelection(0)
.setMinDetectionConfidence(0.5f)
.build());
// Connects MediaPipe Face Detection solution to the user-defined FaceDetectionResultImageView.
faceDetection.setResultListener(
faceDetectionResult -> {
logNoseTipKeypoint(faceDetectionResult, /*faceIndex=*/ 0, /*showPixelValues=*/ true);
imageView.setFaceDetectionResult(faceDetectionResult);
runOnUiThread(() -> imageView.update());
});
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
// Updates the preview layout.
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
frameLayout.removeAllViewsInLayout();
imageView.setImageDrawable(null);
frameLayout.addView(imageView);
imageView.setVisibility(View.VISIBLE);
}
/** Sets up the UI components for the video demo. */
private void setupVideoDemoUiComponents() {
// The Intent to access gallery and read a video file.
videoGetter =
registerForActivityResult(
new ActivityResultContracts.StartActivityForResult(),
result -> {
Intent resultIntent = result.getData();
if (resultIntent != null) {
if (result.getResultCode() == RESULT_OK) {
glSurfaceView.post(
() ->
videoInput.start(
this,
resultIntent.getData(),
faceDetection.getGlContext(),
glSurfaceView.getWidth(),
glSurfaceView.getHeight()));
}
}
});
Button loadVideoButton = findViewById(R.id.button_load_video);
loadVideoButton.setOnClickListener(
v -> {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
});
}
/** Sets up the UI components for the live demo with camera input. */
private void setupLiveDemoUiComponents() {
Button startCameraButton = findViewById(R.id.button_start_camera);
startCameraButton.setOnClickListener(
v -> {
if (inputSource == InputSource.CAMERA) {
return;
}
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.CAMERA);
});
}
/** Sets up core workflow for streaming mode. */
private void setupStreamingModePipeline(InputSource inputSource) {
this.inputSource = inputSource;
// Initializes a new MediaPipe Face Detection solution instance in the streaming mode.
faceDetection =
new FaceDetection(
this,
FaceDetectionOptions.builder().setStaticImageMode(false).setModelSelection(0).build());
faceDetection.setErrorListener(
(message, e) -> Log.e(TAG, "MediaPipe Face Detection error:" + message));
if (inputSource == InputSource.CAMERA) {
cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> faceDetection.send(textureFrame));
} else if (inputSource == InputSource.VIDEO) {
videoInput = new VideoInput(this);
videoInput.setNewFrameListener(textureFrame -> faceDetection.send(textureFrame));
}
// Initializes a new Gl surface view with a user-defined FaceDetectionResultGlRenderer.
glSurfaceView =
new SolutionGlSurfaceView<>(
this, faceDetection.getGlContext(), faceDetection.getGlMajorVersion());
glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
logNoseTipKeypoint(faceDetectionResult, /*faceIndex=*/ 0, /*showPixelValues=*/ false);
glSurfaceView.setRenderData(faceDetectionResult);
glSurfaceView.requestRender();
});
// The runnable to start camera after the gl surface view is attached.
// For video input source, videoInput.start() will be called when the video uri is available.
if (inputSource == InputSource.CAMERA) {
glSurfaceView.post(this::startCamera);
}
// Updates the preview layout.
FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
imageView.setVisibility(View.GONE);
frameLayout.removeAllViewsInLayout();
frameLayout.addView(glSurfaceView);
glSurfaceView.setVisibility(View.VISIBLE);
frameLayout.requestLayout();
}
private void startCamera() {
cameraInput.start(
this,
faceDetection.getGlContext(),
CameraInput.CameraFacing.FRONT,
glSurfaceView.getWidth(),
glSurfaceView.getHeight());
}
private void stopCurrentPipeline() {
if (cameraInput != null) {
cameraInput.setNewFrameListener(null);
cameraInput.close();
}
if (videoInput != null) {
videoInput.setNewFrameListener(null);
videoInput.close();
}
if (glSurfaceView != null) {
glSurfaceView.setVisibility(View.GONE);
}
if (faceDetection != null) {
faceDetection.close();
}
}
private void logNoseTipKeypoint(
FaceDetectionResult result, int faceIndex, boolean showPixelValues) {
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, faceIndex, FaceKeypoint.NOSE_TIP);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) {
int width = result.inputBitmap().getWidth();
int height = result.inputBitmap().getHeight();
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip coordinates (pixel values): x=%f, y=%f",
noseTip.getX() * width, noseTip.getY() * height));
} else {
Log.i(
TAG,
String.format(
"MediaPipe Face Detection nose tip normalized coordinates (value range: [0, 1]):"
+ " x=%f, y=%f",
noseTip.getX(), noseTip.getY()));
}
}
}

View File

@ -0,0 +1 @@
../../../res

View File

@ -31,15 +31,16 @@ dependencies {
implementation 'androidx.appcompat:appcompat:1.3.0' implementation 'androidx.appcompat:appcompat:1.3.0'
implementation 'com.google.android.material:material:1.3.0' implementation 'com.google.android.material:material:1.3.0'
implementation 'androidx.constraintlayout:constraintlayout:2.0.4' implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
implementation 'androidx.exifinterface:exifinterface:1.3.3'
testImplementation 'junit:junit:4.+' testImplementation 'junit:junit:4.+'
androidTestImplementation 'androidx.test.ext:junit:1.1.2' androidTestImplementation 'androidx.test.ext:junit:1.1.2'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
// MediaPipe hands solution API and solution-core. // MediaPipe Face Mesh Solution components.
implementation 'com.google.mediapipe:solution-core:latest.release' implementation 'com.google.mediapipe:solution-core:latest.release'
implementation 'com.google.mediapipe:facemesh:latest.release' implementation 'com.google.mediapipe:facemesh:latest.release'
// MediaPipe deps // MediaPipe deps
implementation 'com.google.flogger:flogger:latest.release' implementation 'com.google.flogger:flogger:0.6'
implementation 'com.google.flogger:flogger-system-backend:latest.release' implementation 'com.google.flogger:flogger-system-backend:0.6'
implementation 'com.google.guava:guava:27.0.1-android' implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4' implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library // CameraX core library

View File

@ -38,6 +38,7 @@ android_binary(
"//third_party:opencv", "//third_party:opencv",
"@maven//:androidx_activity_activity", "@maven//:androidx_activity_activity",
"@maven//:androidx_concurrent_concurrent_futures", "@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_exifinterface_exifinterface",
"@maven//:androidx_fragment_fragment", "@maven//:androidx_fragment_fragment",
"@maven//:com_google_guava_guava", "@maven//:com_google_guava_guava",
], ],

View File

@ -15,11 +15,10 @@
package com.google.mediapipe.examples.facemesh; package com.google.mediapipe.examples.facemesh;
import android.opengl.GLES20; import android.opengl.GLES20;
import android.opengl.Matrix;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.ResultGlBoundary;
import com.google.mediapipe.solutioncore.ResultGlRenderer; import com.google.mediapipe.solutioncore.ResultGlRenderer;
import com.google.mediapipe.solutions.facemesh.FaceMesh;
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections; import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult; import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
@ -27,7 +26,7 @@ import java.nio.ByteOrder;
import java.nio.FloatBuffer; import java.nio.FloatBuffer;
import java.util.List; import java.util.List;
/** A custom implementation of {@link ResultGlRenderer} to render MediaPope FaceMesh results. */ /** A custom implementation of {@link ResultGlRenderer} to render {@link FaceMeshResult}. */
public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult> { public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult> {
private static final String TAG = "FaceMeshResultGlRenderer"; private static final String TAG = "FaceMeshResultGlRenderer";
@ -46,10 +45,10 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f}; private static final float[] LIPS_COLOR = new float[] {0.9f, 0.9f, 0.9f, 1f};
private static final int LIPS_THICKNESS = 8; private static final int LIPS_THICKNESS = 8;
private static final String VERTEX_SHADER = private static final String VERTEX_SHADER =
"uniform mat4 uTransformMatrix;\n" "uniform mat4 uProjectionMatrix;\n"
+ "attribute vec4 vPosition;\n" + "attribute vec4 vPosition;\n"
+ "void main() {\n" + "void main() {\n"
+ " gl_Position = uTransformMatrix * vPosition;\n" + " gl_Position = uProjectionMatrix * vPosition;\n"
+ "}"; + "}";
private static final String FRAGMENT_SHADER = private static final String FRAGMENT_SHADER =
"precision mediump float;\n" "precision mediump float;\n"
@ -59,9 +58,8 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
+ "}"; + "}";
private int program; private int program;
private int positionHandle; private int positionHandle;
private int transformMatrixHandle; private int projectionMatrixHandle;
private int colorHandle; private int colorHandle;
private final float[] transformMatrix = new float[16];
private int loadShader(int type, String shaderCode) { private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type); int shader = GLES20.glCreateShader(type);
@ -79,28 +77,17 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
GLES20.glAttachShader(program, fragmentShader); GLES20.glAttachShader(program, fragmentShader);
GLES20.glLinkProgram(program); GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition"); positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix"); projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
colorHandle = GLES20.glGetUniformLocation(program, "uColor"); colorHandle = GLES20.glGetUniformLocation(program, "uColor");
} }
@Override @Override
public void renderResult(FaceMeshResult result, ResultGlBoundary boundary) { public void renderResult(FaceMeshResult result, float[] projectionMatrix) {
if (result == null) { if (result == null) {
return; return;
} }
GLES20.glUseProgram(program); GLES20.glUseProgram(program);
// Sets the transform matrix to align the result rendering with the scaled output texture. GLES20.glUniformMatrix4fv(projectionMatrixHandle, 1, false, projectionMatrix, 0);
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
// top-left corner.
Matrix.setIdentityM(transformMatrix, 0);
Matrix.scaleM(
transformMatrix,
0,
2 / (boundary.right() - boundary.left()),
-2 / (boundary.top() - boundary.bottom()),
1.0f);
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
int numFaces = result.multiFaceLandmarks().size(); int numFaces = result.multiFaceLandmarks().size();
for (int i = 0; i < numFaces; ++i) { for (int i = 0; i < numFaces; ++i) {
@ -126,7 +113,7 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
LEFT_EYE_THICKNESS); LEFT_EYE_THICKNESS);
drawLandmarks( drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(), result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W, FaceMeshConnections.FACEMESH_LEFT_EYEBROW,
LEFT_EYEBROW_COLOR, LEFT_EYEBROW_COLOR,
LEFT_EYEBROW_THICKNESS); LEFT_EYEBROW_THICKNESS);
drawLandmarks( drawLandmarks(
@ -139,11 +126,24 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
FaceMeshConnections.FACEMESH_LIPS, FaceMeshConnections.FACEMESH_LIPS,
LIPS_COLOR, LIPS_COLOR,
LIPS_THICKNESS); LIPS_THICKNESS);
if (result.multiFaceLandmarks().get(i).getLandmarkCount()
== FaceMesh.FACEMESH_NUM_LANDMARKS_WITH_IRISES) {
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_IRIS,
RIGHT_EYE_COLOR,
RIGHT_EYE_THICKNESS);
drawLandmarks(
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_IRIS,
LEFT_EYE_COLOR,
LEFT_EYE_THICKNESS);
}
} }
} }
/** /**
* Calls this to delete the shader program. * Deletes the shader program.
* *
* <p>This is only necessary if one wants to release the program while keeping the context around. * <p>This is only necessary if one wants to release the program while keeping the context around.
*/ */
@ -159,13 +159,9 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0); GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
GLES20.glLineWidth(thickness); GLES20.glLineWidth(thickness);
for (FaceMeshConnections.Connection c : connections) { for (FaceMeshConnections.Connection c : connections) {
float[] vertex = new float[4];
NormalizedLandmark start = faceLandmarkList.get(c.start()); NormalizedLandmark start = faceLandmarkList.get(c.start());
vertex[0] = normalizedLandmarkValue(start.getX());
vertex[1] = normalizedLandmarkValue(start.getY());
NormalizedLandmark end = faceLandmarkList.get(c.end()); NormalizedLandmark end = faceLandmarkList.get(c.end());
vertex[2] = normalizedLandmarkValue(end.getX()); float[] vertex = {start.getX(), start.getY(), end.getX(), end.getY()};
vertex[3] = normalizedLandmarkValue(end.getY());
FloatBuffer vertexBuffer = FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertex.length * 4) ByteBuffer.allocateDirect(vertex.length * 4)
.order(ByteOrder.nativeOrder()) .order(ByteOrder.nativeOrder())
@ -177,10 +173,4 @@ public class FaceMeshResultGlRenderer implements ResultGlRenderer<FaceMeshResult
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2); GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
} }
} }
// Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate
// value range: [-1, 1].
private float normalizedLandmarkValue(float value) {
return value * 2 - 1;
}
} }

View File

@ -24,11 +24,12 @@ import androidx.appcompat.widget.AppCompatImageView;
import android.util.Size; import android.util.Size;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutions.facemesh.FaceMesh;
import com.google.mediapipe.solutions.facemesh.FaceMeshConnections; import com.google.mediapipe.solutions.facemesh.FaceMeshConnections;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult; import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.util.List; import java.util.List;
/** An ImageView implementation for displaying MediaPipe FaceMesh results. */ /** An ImageView implementation for displaying {@link FaceMeshResult}. */
public class FaceMeshResultImageView extends AppCompatImageView { public class FaceMeshResultImageView extends AppCompatImageView {
private static final String TAG = "FaceMeshResultImageView"; private static final String TAG = "FaceMeshResultImageView";
@ -103,7 +104,7 @@ public class FaceMeshResultImageView extends AppCompatImageView {
drawLandmarksOnCanvas( drawLandmarksOnCanvas(
canvas, canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(), result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_EYEBR0W, FaceMeshConnections.FACEMESH_LEFT_EYEBROW,
imageSize, imageSize,
LEFT_EYEBROW_COLOR, LEFT_EYEBROW_COLOR,
LEFT_EYEBROW_THICKNESS); LEFT_EYEBROW_THICKNESS);
@ -121,10 +122,27 @@ public class FaceMeshResultImageView extends AppCompatImageView {
imageSize, imageSize,
LIPS_COLOR, LIPS_COLOR,
LIPS_THICKNESS); LIPS_THICKNESS);
if (result.multiFaceLandmarks().get(i).getLandmarkCount()
== FaceMesh.FACEMESH_NUM_LANDMARKS_WITH_IRISES) {
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_RIGHT_IRIS,
imageSize,
RIGHT_EYE_COLOR,
RIGHT_EYE_THICKNESS);
drawLandmarksOnCanvas(
canvas,
result.multiFaceLandmarks().get(i).getLandmarkList(),
FaceMeshConnections.FACEMESH_LEFT_IRIS,
imageSize,
LEFT_EYE_COLOR,
LEFT_EYE_THICKNESS);
}
} }
} }
/** Updates the image view with the latest facemesh result. */ /** Updates the image view with the latest {@link FaceMeshResult}. */
public void update() { public void update() {
postInvalidate(); postInvalidate();
if (latest != null) { if (latest != null) {
@ -132,7 +150,6 @@ public class FaceMeshResultImageView extends AppCompatImageView {
} }
} }
// TODO: Better hand landmark and hand connection drawing.
private void drawLandmarksOnCanvas( private void drawLandmarksOnCanvas(
Canvas canvas, Canvas canvas,
List<NormalizedLandmark> faceLandmarkList, List<NormalizedLandmark> faceLandmarkList,

View File

@ -16,6 +16,7 @@ package com.google.mediapipe.examples.facemesh;
import android.content.Intent; import android.content.Intent;
import android.graphics.Bitmap; import android.graphics.Bitmap;
import android.graphics.Matrix;
import android.os.Bundle; import android.os.Bundle;
import android.provider.MediaStore; import android.provider.MediaStore;
import androidx.appcompat.app.AppCompatActivity; import androidx.appcompat.app.AppCompatActivity;
@ -25,6 +26,8 @@ import android.widget.Button;
import android.widget.FrameLayout; import android.widget.FrameLayout;
import androidx.activity.result.ActivityResultLauncher; import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts; import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
@ -33,8 +36,9 @@ import com.google.mediapipe.solutions.facemesh.FaceMesh;
import com.google.mediapipe.solutions.facemesh.FaceMeshOptions; import com.google.mediapipe.solutions.facemesh.FaceMeshOptions;
import com.google.mediapipe.solutions.facemesh.FaceMeshResult; import com.google.mediapipe.solutions.facemesh.FaceMeshResult;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
/** Main activity of MediaPipe FaceMesh app. */ /** Main activity of MediaPipe Face Mesh app. */
public class MainActivity extends AppCompatActivity { public class MainActivity extends AppCompatActivity {
private static final String TAG = "MainActivity"; private static final String TAG = "MainActivity";
@ -57,12 +61,14 @@ public class MainActivity extends AppCompatActivity {
private ActivityResultLauncher<Intent> videoGetter; private ActivityResultLauncher<Intent> videoGetter;
// Live camera demo UI and camera components. // Live camera demo UI and camera components.
private CameraInput cameraInput; private CameraInput cameraInput;
private SolutionGlSurfaceView<FaceMeshResult> glSurfaceView; private SolutionGlSurfaceView<FaceMeshResult> glSurfaceView;
@Override @Override
protected void onCreate(Bundle savedInstanceState) { protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState); super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main); setContentView(R.layout.activity_main);
// TODO: Add a toggle to switch between the original face mesh and attention mesh.
setupStaticImageDemoUiComponents(); setupStaticImageDemoUiComponents();
setupVideoDemoUiComponents(); setupVideoDemoUiComponents();
setupLiveDemoUiComponents(); setupLiveDemoUiComponents();
@ -111,6 +117,35 @@ public class MainActivity extends AppCompatActivity {
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e); Log.e(TAG, "Bitmap reading error:" + e);
} }
try {
InputStream imageData =
this.getContentResolver().openInputStream(resultIntent.getData());
int orientation =
new ExifInterface(imageData)
.getAttributeInt(
ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);
if (orientation != ExifInterface.ORIENTATION_NORMAL) {
Matrix matrix = new Matrix();
switch (orientation) {
case ExifInterface.ORIENTATION_ROTATE_90:
matrix.postRotate(90);
break;
case ExifInterface.ORIENTATION_ROTATE_180:
matrix.postRotate(180);
break;
case ExifInterface.ORIENTATION_ROTATE_270:
matrix.postRotate(270);
break;
default:
matrix.postRotate(0);
}
bitmap =
Bitmap.createBitmap(
bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
}
} catch (IOException e) {
Log.e(TAG, "Bitmap rotation error:" + e);
}
if (bitmap != null) { if (bitmap != null) {
facemesh.send(bitmap); facemesh.send(bitmap);
} }
@ -132,26 +167,27 @@ public class MainActivity extends AppCompatActivity {
imageView = new FaceMeshResultImageView(this); imageView = new FaceMeshResultImageView(this);
} }
/** The core MediaPipe FaceMesh setup workflow for its static image mode. */ /** Sets up core workflow for static image mode. */
private void setupStaticImageModePipeline() { private void setupStaticImageModePipeline() {
this.inputSource = InputSource.IMAGE; this.inputSource = InputSource.IMAGE;
// Initializes a new MediaPipe FaceMesh instance in the static image mode. // Initializes a new MediaPipe Face Mesh solution instance in the static image mode.
facemesh = facemesh =
new FaceMesh( new FaceMesh(
this, this,
FaceMeshOptions.builder() FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STATIC_IMAGE_MODE) .setStaticImageMode(true)
.setRefineLandmarks(true)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
// Connects MediaPipe FaceMesh to the user-defined FaceMeshResultImageView. // Connects MediaPipe Face Mesh solution to the user-defined FaceMeshResultImageView.
facemesh.setResultListener( facemesh.setResultListener(
faceMeshResult -> { faceMeshResult -> {
logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true); logNoseLandmark(faceMeshResult, /*showPixelValues=*/ true);
imageView.setFaceMeshResult(faceMeshResult); imageView.setFaceMeshResult(faceMeshResult);
runOnUiThread(() -> imageView.update()); runOnUiThread(() -> imageView.update());
}); });
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
// Updates the preview layout. // Updates the preview layout.
FrameLayout frameLayout = findViewById(R.id.preview_display_layout); FrameLayout frameLayout = findViewById(R.id.preview_display_layout);
@ -207,25 +243,24 @@ public class MainActivity extends AppCompatActivity {
}); });
} }
/** The core MediaPipe FaceMesh setup workflow for its streaming mode. */ /** Sets up core workflow for streaming mode. */
private void setupStreamingModePipeline(InputSource inputSource) { private void setupStreamingModePipeline(InputSource inputSource) {
this.inputSource = inputSource; this.inputSource = inputSource;
// Initializes a new MediaPipe FaceMesh instance in the streaming mode. // Initializes a new MediaPipe Face Mesh solution instance in the streaming mode.
facemesh = facemesh =
new FaceMesh( new FaceMesh(
this, this,
FaceMeshOptions.builder() FaceMeshOptions.builder()
.setMode(FaceMeshOptions.STREAMING_MODE) .setStaticImageMode(false)
.setRefineLandmarks(true)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe FaceMesh error:" + message)); facemesh.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Face Mesh error:" + message));
if (inputSource == InputSource.CAMERA) { if (inputSource == InputSource.CAMERA) {
// Initializes a new CameraInput instance and connects it to MediaPipe FaceMesh.
cameraInput = new CameraInput(this); cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame)); cameraInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
} else if (inputSource == InputSource.VIDEO) { } else if (inputSource == InputSource.VIDEO) {
// Initializes a new VideoInput instance and connects it to MediaPipe FaceMesh.
videoInput = new VideoInput(this); videoInput = new VideoInput(this);
videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame)); videoInput.setNewFrameListener(textureFrame -> facemesh.send(textureFrame));
} }
@ -295,13 +330,13 @@ public class MainActivity extends AppCompatActivity {
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
"MediaPipe FaceMesh nose coordinates (pixel values): x=%f, y=%f", "MediaPipe Face Mesh nose coordinates (pixel values): x=%f, y=%f",
noseLandmark.getX() * width, noseLandmark.getY() * height)); noseLandmark.getX() * width, noseLandmark.getY() * height));
} else { } else {
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
"MediaPipe FaceMesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f", "MediaPipe Face Mesh nose normalized coordinates (value range: [0, 1]): x=%f, y=%f",
noseLandmark.getX(), noseLandmark.getY())); noseLandmark.getX(), noseLandmark.getY()));
} }
} }

View File

@ -31,15 +31,16 @@ dependencies {
implementation 'androidx.appcompat:appcompat:1.3.0' implementation 'androidx.appcompat:appcompat:1.3.0'
implementation 'com.google.android.material:material:1.3.0' implementation 'com.google.android.material:material:1.3.0'
implementation 'androidx.constraintlayout:constraintlayout:2.0.4' implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
implementation 'androidx.exifinterface:exifinterface:1.3.3'
testImplementation 'junit:junit:4.+' testImplementation 'junit:junit:4.+'
androidTestImplementation 'androidx.test.ext:junit:1.1.2' androidTestImplementation 'androidx.test.ext:junit:1.1.2'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
// MediaPipe hands solution API and solution-core. // MediaPipe Hands Solution components.
implementation 'com.google.mediapipe:solution-core:latest.release' implementation 'com.google.mediapipe:solution-core:latest.release'
implementation 'com.google.mediapipe:hands:latest.release' implementation 'com.google.mediapipe:hands:latest.release'
// MediaPipe deps // MediaPipe deps
implementation 'com.google.flogger:flogger:latest.release' implementation 'com.google.flogger:flogger:0.6'
implementation 'com.google.flogger:flogger-system-backend:latest.release' implementation 'com.google.flogger:flogger-system-backend:0.6'
implementation 'com.google.guava:guava:27.0.1-android' implementation 'com.google.guava:guava:27.0.1-android'
implementation 'com.google.protobuf:protobuf-java:3.11.4' implementation 'com.google.protobuf:protobuf-java:3.11.4'
// CameraX core library // CameraX core library

View File

@ -38,6 +38,7 @@ android_binary(
"//third_party:opencv", "//third_party:opencv",
"@maven//:androidx_activity_activity", "@maven//:androidx_activity_activity",
"@maven//:androidx_concurrent_concurrent_futures", "@maven//:androidx_concurrent_concurrent_futures",
"@maven//:androidx_exifinterface_exifinterface",
"@maven//:androidx_fragment_fragment", "@maven//:androidx_fragment_fragment",
"@maven//:com_google_guava_guava", "@maven//:com_google_guava_guava",
], ],

View File

@ -15,9 +15,7 @@
package com.google.mediapipe.examples.hands; package com.google.mediapipe.examples.hands;
import android.opengl.GLES20; import android.opengl.GLES20;
import android.opengl.Matrix;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.ResultGlBoundary;
import com.google.mediapipe.solutioncore.ResultGlRenderer; import com.google.mediapipe.solutioncore.ResultGlRenderer;
import com.google.mediapipe.solutions.hands.Hands; import com.google.mediapipe.solutions.hands.Hands;
import com.google.mediapipe.solutions.hands.HandsResult; import com.google.mediapipe.solutions.hands.HandsResult;
@ -26,16 +24,16 @@ import java.nio.ByteOrder;
import java.nio.FloatBuffer; import java.nio.FloatBuffer;
import java.util.List; import java.util.List;
/** A custom implementation of {@link ResultGlRenderer} to render MediaPope Hands results. */ /** A custom implementation of {@link ResultGlRenderer} to render {@link HandsResult}. */
public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> { public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
private static final String TAG = "HandsResultGlRenderer"; private static final String TAG = "HandsResultGlRenderer";
private static final float CONNECTION_THICKNESS = 20.0f; private static final float CONNECTION_THICKNESS = 20.0f;
private static final String VERTEX_SHADER = private static final String VERTEX_SHADER =
"uniform mat4 uTransformMatrix;\n" "uniform mat4 uProjectionMatrix;\n"
+ "attribute vec4 vPosition;\n" + "attribute vec4 vPosition;\n"
+ "void main() {\n" + "void main() {\n"
+ " gl_Position = uTransformMatrix * vPosition;\n" + " gl_Position = uProjectionMatrix * vPosition;\n"
+ "}"; + "}";
private static final String FRAGMENT_SHADER = private static final String FRAGMENT_SHADER =
"precision mediump float;\n" "precision mediump float;\n"
@ -44,8 +42,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
+ "}"; + "}";
private int program; private int program;
private int positionHandle; private int positionHandle;
private int transformMatrixHandle; private int projectionMatrixHandle;
private final float[] transformMatrix = new float[16];
private int loadShader(int type, String shaderCode) { private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type); int shader = GLES20.glCreateShader(type);
@ -63,27 +60,16 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glAttachShader(program, fragmentShader); GLES20.glAttachShader(program, fragmentShader);
GLES20.glLinkProgram(program); GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition"); positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
transformMatrixHandle = GLES20.glGetUniformLocation(program, "uTransformMatrix"); projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
} }
@Override @Override
public void renderResult(HandsResult result, ResultGlBoundary boundary) { public void renderResult(HandsResult result, float[] projectionMatrix) {
if (result == null) { if (result == null) {
return; return;
} }
GLES20.glUseProgram(program); GLES20.glUseProgram(program);
// Sets the transform matrix to align the result rendering with the scaled output texture. GLES20.glUniformMatrix4fv(projectionMatrixHandle, 1, false, projectionMatrix, 0);
// Also flips the rendering vertically since OpenGL assumes the coordinate origin is at the
// bottom-left corner, whereas MediaPipe landmark data assumes the coordinate origin is at the
// top-left corner.
Matrix.setIdentityM(transformMatrix, 0);
Matrix.scaleM(
transformMatrix,
0,
2 / (boundary.right() - boundary.left()),
-2 / (boundary.top() - boundary.bottom()),
1.0f);
GLES20.glUniformMatrix4fv(transformMatrixHandle, 1, false, transformMatrix, 0);
GLES20.glLineWidth(CONNECTION_THICKNESS); GLES20.glLineWidth(CONNECTION_THICKNESS);
int numHands = result.multiHandLandmarks().size(); int numHands = result.multiHandLandmarks().size();
@ -93,7 +79,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
} }
/** /**
* Calls this to delete the shader program. * Deletes the shader program.
* *
* <p>This is only necessary if one wants to release the program while keeping the context around. * <p>This is only necessary if one wants to release the program while keeping the context around.
*/ */
@ -101,16 +87,11 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDeleteProgram(program); GLES20.glDeleteProgram(program);
} }
// TODO: Better hand landmark and hand connection drawing.
private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) { private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) {
for (Hands.Connection c : Hands.HAND_CONNECTIONS) { for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
float[] vertex = new float[4];
NormalizedLandmark start = handLandmarkList.get(c.start()); NormalizedLandmark start = handLandmarkList.get(c.start());
vertex[0] = normalizedLandmarkValue(start.getX());
vertex[1] = normalizedLandmarkValue(start.getY());
NormalizedLandmark end = handLandmarkList.get(c.end()); NormalizedLandmark end = handLandmarkList.get(c.end());
vertex[2] = normalizedLandmarkValue(end.getX()); float[] vertex = {start.getX(), start.getY(), end.getX(), end.getY()};
vertex[3] = normalizedLandmarkValue(end.getY());
FloatBuffer vertexBuffer = FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertex.length * 4) ByteBuffer.allocateDirect(vertex.length * 4)
.order(ByteOrder.nativeOrder()) .order(ByteOrder.nativeOrder())
@ -122,10 +103,4 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2); GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
} }
} }
// Normalizes the value from the landmark value range:[0, 1] to the standard OpenGL coordinate
// value range: [-1, 1].
private float normalizedLandmarkValue(float value) {
return value * 2 - 1;
}
} }

View File

@ -27,7 +27,7 @@ import com.google.mediapipe.solutions.hands.Hands;
import com.google.mediapipe.solutions.hands.HandsResult; import com.google.mediapipe.solutions.hands.HandsResult;
import java.util.List; import java.util.List;
/** An ImageView implementation for displaying MediaPipe Hands results. */ /** An ImageView implementation for displaying {@link HandsResult}. */
public class HandsResultImageView extends AppCompatImageView { public class HandsResultImageView extends AppCompatImageView {
private static final String TAG = "HandsResultImageView"; private static final String TAG = "HandsResultImageView";
@ -66,7 +66,7 @@ public class HandsResultImageView extends AppCompatImageView {
} }
} }
/** Updates the image view with the latest hands result. */ /** Updates the image view with the latest {@link HandsResult}. */
public void update() { public void update() {
postInvalidate(); postInvalidate();
if (latest != null) { if (latest != null) {
@ -74,7 +74,6 @@ public class HandsResultImageView extends AppCompatImageView {
} }
} }
// TODO: Better hand landmark and hand connection drawing.
private void drawLandmarksOnCanvas( private void drawLandmarksOnCanvas(
List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) { List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) {
// Draw connections. // Draw connections.

View File

@ -16,6 +16,7 @@ package com.google.mediapipe.examples.hands;
import android.content.Intent; import android.content.Intent;
import android.graphics.Bitmap; import android.graphics.Bitmap;
import android.graphics.Matrix;
import android.os.Bundle; import android.os.Bundle;
import android.provider.MediaStore; import android.provider.MediaStore;
import androidx.appcompat.app.AppCompatActivity; import androidx.appcompat.app.AppCompatActivity;
@ -25,6 +26,8 @@ import android.widget.Button;
import android.widget.FrameLayout; import android.widget.FrameLayout;
import androidx.activity.result.ActivityResultLauncher; import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts; import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
@ -34,6 +37,7 @@ import com.google.mediapipe.solutions.hands.Hands;
import com.google.mediapipe.solutions.hands.HandsOptions; import com.google.mediapipe.solutions.hands.HandsOptions;
import com.google.mediapipe.solutions.hands.HandsResult; import com.google.mediapipe.solutions.hands.HandsResult;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
/** Main activity of MediaPipe Hands app. */ /** Main activity of MediaPipe Hands app. */
public class MainActivity extends AppCompatActivity { public class MainActivity extends AppCompatActivity {
@ -59,6 +63,7 @@ public class MainActivity extends AppCompatActivity {
private ActivityResultLauncher<Intent> videoGetter; private ActivityResultLauncher<Intent> videoGetter;
// Live camera demo UI and camera components. // Live camera demo UI and camera components.
private CameraInput cameraInput; private CameraInput cameraInput;
private SolutionGlSurfaceView<HandsResult> glSurfaceView; private SolutionGlSurfaceView<HandsResult> glSurfaceView;
@Override @Override
@ -113,6 +118,35 @@ public class MainActivity extends AppCompatActivity {
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Bitmap reading error:" + e); Log.e(TAG, "Bitmap reading error:" + e);
} }
try {
InputStream imageData =
this.getContentResolver().openInputStream(resultIntent.getData());
int orientation =
new ExifInterface(imageData)
.getAttributeInt(
ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);
if (orientation != ExifInterface.ORIENTATION_NORMAL) {
Matrix matrix = new Matrix();
switch (orientation) {
case ExifInterface.ORIENTATION_ROTATE_90:
matrix.postRotate(90);
break;
case ExifInterface.ORIENTATION_ROTATE_180:
matrix.postRotate(180);
break;
case ExifInterface.ORIENTATION_ROTATE_270:
matrix.postRotate(270);
break;
default:
matrix.postRotate(0);
}
bitmap =
Bitmap.createBitmap(
bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);
}
} catch (IOException e) {
Log.e(TAG, "Bitmap rotation error:" + e);
}
if (bitmap != null) { if (bitmap != null) {
hands.send(bitmap); hands.send(bitmap);
} }
@ -134,20 +168,20 @@ public class MainActivity extends AppCompatActivity {
imageView = new HandsResultImageView(this); imageView = new HandsResultImageView(this);
} }
/** The core MediaPipe Hands setup workflow for its static image mode. */ /** Sets up core workflow for static image mode. */
private void setupStaticImageModePipeline() { private void setupStaticImageModePipeline() {
this.inputSource = InputSource.IMAGE; this.inputSource = InputSource.IMAGE;
// Initializes a new MediaPipe Hands instance in the static image mode. // Initializes a new MediaPipe Hands solution instance in the static image mode.
hands = hands =
new Hands( new Hands(
this, this,
HandsOptions.builder() HandsOptions.builder()
.setMode(HandsOptions.STATIC_IMAGE_MODE) .setStaticImageMode(true)
.setMaxNumHands(1) .setMaxNumHands(1)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
// Connects MediaPipe Hands to the user-defined HandsResultImageView. // Connects MediaPipe Hands solution to the user-defined HandsResultImageView.
hands.setResultListener( hands.setResultListener(
handsResult -> { handsResult -> {
logWristLandmark(handsResult, /*showPixelValues=*/ true); logWristLandmark(handsResult, /*showPixelValues=*/ true);
@ -210,26 +244,24 @@ public class MainActivity extends AppCompatActivity {
}); });
} }
/** The core MediaPipe Hands setup workflow for its streaming mode. */ /** Sets up core workflow for streaming mode. */
private void setupStreamingModePipeline(InputSource inputSource) { private void setupStreamingModePipeline(InputSource inputSource) {
this.inputSource = inputSource; this.inputSource = inputSource;
// Initializes a new MediaPipe Hands instance in the streaming mode. // Initializes a new MediaPipe Hands solution instance in the streaming mode.
hands = hands =
new Hands( new Hands(
this, this,
HandsOptions.builder() HandsOptions.builder()
.setMode(HandsOptions.STREAMING_MODE) .setStaticImageMode(false)
.setMaxNumHands(1) .setMaxNumHands(1)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
if (inputSource == InputSource.CAMERA) { if (inputSource == InputSource.CAMERA) {
// Initializes a new CameraInput instance and connects it to MediaPipe Hands.
cameraInput = new CameraInput(this); cameraInput = new CameraInput(this);
cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame)); cameraInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
} else if (inputSource == InputSource.VIDEO) { } else if (inputSource == InputSource.VIDEO) {
// Initializes a new VideoInput instance and connects it to MediaPipe Hands.
videoInput = new VideoInput(this); videoInput = new VideoInput(this);
videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame)); videoInput.setNewFrameListener(textureFrame -> hands.send(textureFrame));
} }

View File

@ -1,3 +1,4 @@
rootProject.name = "mediapipe-solutions-examples" rootProject.name = "mediapipe-solutions-examples"
include ':hands' include ':facedetection'
include ':facemesh' include ':facemesh'
include ':hands'

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]), srcs = glob(["*.java"]),
assets = [ assets = [
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb", "//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite", "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
], ],
assets_dir = "", assets_dir = "",

View File

@ -63,7 +63,7 @@ objc_library(
data = [ data = [
"//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb", "//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite", "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
], ],
deps = [ deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -23,6 +23,7 @@
#if !MEDIAPIPE_DISABLE_GPU #if !MEDIAPIPE_DISABLE_GPU
#ifdef __APPLE__ #ifdef __APPLE__
#include "mediapipe/objc/CFHolder.h" #include "mediapipe/objc/CFHolder.h"
#include "mediapipe/objc/util.h"
#endif // __APPLE__ #endif // __APPLE__
#endif // !MEDIAPIPE_DISABLE_GPU #endif // !MEDIAPIPE_DISABLE_GPU

View File

@ -89,6 +89,18 @@ cc_library(
], ],
) )
cc_library(
name = "commandlineflags",
hdrs = [
"commandlineflags.h",
],
visibility = ["//visibility:public"],
deps = [
"//third_party:glog",
"@com_google_absl//absl/flags:flag",
],
)
cc_library( cc_library(
name = "core_proto", name = "core_proto",
hdrs = [ hdrs = [

View File

@ -0,0 +1,30 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_PORT_COMMANDLINEFLAGS_H_
#define MEDIAPIPE_PORT_COMMANDLINEFLAGS_H_
#include "gflags/gflags.h"
namespace absl {
template <typename T>
T GetFlag(const T& f) {
return f;
}
template <typename T, typename U>
void SetFlag(T* f, const U& u) {
*f = u;
}
} // namespace absl
#endif // MEDIAPIPE_PORT_COMMANDLINEFLAGS_H_

View File

@ -202,6 +202,7 @@ cc_library(
"//mediapipe/framework:packet_type", "//mediapipe/framework:packet_type",
"//mediapipe/framework/port:advanced_proto", "//mediapipe/framework/port:advanced_proto",
"//mediapipe/framework/port:any_proto", "//mediapipe/framework/port:any_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"@com_google_absl//absl/status", "@com_google_absl//absl/status",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",
@ -291,7 +292,9 @@ mediapipe_cc_test(
data = [":node_chain_subgraph.proto"], data = [":node_chain_subgraph.proto"],
requires_full_emulation = False, requires_full_emulation = False,
deps = [ deps = [
":options_field_util",
":options_registry", ":options_registry",
":options_syntax_util",
":options_util", ":options_util",
"//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator_cc_proto", "//mediapipe/calculators/core:flow_limiter_calculator_cc_proto",
@ -305,8 +308,8 @@ mediapipe_cc_test(
"//mediapipe/framework/port:status", "//mediapipe/framework/port:status",
"//mediapipe/framework/testdata:night_light_calculator_options_lib", "//mediapipe/framework/testdata:night_light_calculator_options_lib",
"//mediapipe/framework/tool:node_chain_subgraph_options_lib", "//mediapipe/framework/tool:node_chain_subgraph_options_lib",
"//mediapipe/framework/tool:options_syntax_util",
"//mediapipe/util:header_util", "//mediapipe/util:header_util",
"@com_google_absl//absl/strings",
], ],
) )

View File

@ -8,11 +8,13 @@
#include "absl/status/status.h" #include "absl/status/status.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/packet.h" #include "mediapipe/framework/packet.h"
#include "mediapipe/framework/packet_type.h" #include "mediapipe/framework/packet_type.h"
#include "mediapipe/framework/port/advanced_proto_inc.h" #include "mediapipe/framework/port/advanced_proto_inc.h"
#include "mediapipe/framework/port/any_proto.h" #include "mediapipe/framework/port/any_proto.h"
#include "mediapipe/framework/port/canonical_errors.h" #include "mediapipe/framework/port/canonical_errors.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/tool/name_util.h" #include "mediapipe/framework/tool/name_util.h"
#include "mediapipe/framework/tool/proto_util_lite.h" #include "mediapipe/framework/tool/proto_util_lite.h"
@ -31,6 +33,9 @@ using ::mediapipe::proto_ns::io::StringOutputStream;
// Utility functions for OptionsFieldUtil. // Utility functions for OptionsFieldUtil.
namespace { namespace {
// The type name for the proto3 "Any" type.
constexpr absl::string_view kGoogleProtobufAny = "google.protobuf.Any";
// Converts a FieldDescriptor::Type to the corresponding FieldType. // Converts a FieldDescriptor::Type to the corresponding FieldType.
FieldType AsFieldType(proto_ns::FieldDescriptorProto::Type type) { FieldType AsFieldType(proto_ns::FieldDescriptorProto::Type type) {
return static_cast<FieldType>(type); return static_cast<FieldType>(type);
@ -81,7 +86,7 @@ absl::Status WriteValue(const FieldData& value, FieldType field_type,
return absl::UnimplementedError( return absl::UnimplementedError(
absl::StrCat("Cannot write type: ", field_type)); absl::StrCat("Cannot write type: ", field_type));
} }
return mediapipe::OkStatus(); return absl::OkStatus();
} }
// Serializes a packet value. // Serializes a packet value.
@ -167,6 +172,7 @@ absl::Status ReadValue(absl::string_view field_bytes, FieldType field_type,
// Deserializes a packet from a protobuf field. // Deserializes a packet from a protobuf field.
absl::Status ReadField(absl::string_view bytes, const FieldDescriptor* field, absl::Status ReadField(absl::string_view bytes, const FieldDescriptor* field,
FieldData* result) { FieldData* result) {
RET_CHECK_NE(field, nullptr);
FieldType field_type = AsFieldType(field->type()); FieldType field_type = AsFieldType(field->type());
std::string message_type = (field_type == WireFormatLite::TYPE_MESSAGE) std::string message_type = (field_type == WireFormatLite::TYPE_MESSAGE)
? field->message_type()->full_name() ? field->message_type()->full_name()
@ -174,47 +180,137 @@ absl::Status ReadField(absl::string_view bytes, const FieldDescriptor* field,
return ReadValue(bytes, field_type, message_type, result); return ReadValue(bytes, field_type, message_type, result);
} }
// Converts a chain of fields and indexes into field-numbers and indexes. // Reads all values from a repeated field.
ProtoUtilLite::ProtoPath AsProtoPath(const FieldPath& field_path) { absl::Status GetFieldValues(const FieldData& message_data,
ProtoUtilLite::ProtoPath result; const FieldDescriptor& field,
for (auto field : field_path) { std::vector<FieldData>* result) {
result.push_back({field.first->number(), field.second}); const std::string& message_bytes = message_data.message_value().value();
FieldType field_type = AsFieldType(field.type());
ProtoUtilLite proto_util;
ProtoUtilLite::ProtoPath proto_path = {{field.number(), 0}};
int count;
MP_RETURN_IF_ERROR(
proto_util.GetFieldCount(message_bytes, proto_path, field_type, &count));
std::vector<std::string> field_values;
MP_RETURN_IF_ERROR(proto_util.GetFieldRange(message_bytes, proto_path, count,
field_type, &field_values));
for (int i = 0; i < count; ++i) {
FieldData r;
MP_RETURN_IF_ERROR(ReadField(field_values[i], &field, &r));
result->push_back(std::move(r));
} }
return absl::OkStatus();
}
// Reads one value from a field.
absl::Status GetFieldValue(const FieldData& message_data,
const FieldPathEntry& entry, FieldData* result) {
RET_CHECK_NE(entry.field, nullptr);
const std::string& message_bytes = message_data.message_value().value();
FieldType field_type = AsFieldType(entry.field->type());
ProtoUtilLite proto_util;
ProtoUtilLite::ProtoPath proto_path = {{entry.field->number(), entry.index}};
std::vector<std::string> field_values;
MP_RETURN_IF_ERROR(proto_util.GetFieldRange(message_bytes, proto_path, 1,
field_type, &field_values));
MP_RETURN_IF_ERROR(ReadField(field_values[0], entry.field, result));
return absl::OkStatus();
}
// Writes one value to a field.
absl::Status SetFieldValue(const FieldPathEntry& entry, const FieldData& value,
FieldData* result) {
std::vector<FieldData> field_values;
ProtoUtilLite proto_util;
FieldType field_type = AsFieldType(entry.field->type());
ProtoUtilLite::ProtoPath proto_path = {{entry.field->number(), entry.index}};
std::string* message_bytes = result->mutable_message_value()->mutable_value();
int field_count;
MP_RETURN_IF_ERROR(proto_util.GetFieldCount(*message_bytes, proto_path,
field_type, &field_count));
if (entry.index > field_count) {
return absl::OutOfRangeError(
absl::StrCat("Option field index out of range: ", entry.index));
}
int replace_length = entry.index < field_count ? 1 : 0;
std::string field_value;
MP_RETURN_IF_ERROR(WriteField(value, entry.field, &field_value));
MP_RETURN_IF_ERROR(proto_util.ReplaceFieldRange(
message_bytes, proto_path, replace_length, field_type, {field_value}));
return absl::OkStatus();
}
// Returns true for a field of type "google.protobuf.Any".
bool IsProtobufAny(const FieldDescriptor* field) {
return AsFieldType(field->type()) == FieldType::TYPE_MESSAGE &&
field->message_type()->full_name() == kGoogleProtobufAny;
}
// Returns the message FieldData from a serialized protobuf.Any.
FieldData ParseProtobufAny(const FieldData& data) {
protobuf::Any any;
any.ParseFromString(data.message_value().value());
FieldData result;
result.mutable_message_value()->set_value(std::string(any.value()));
result.mutable_message_value()->set_type_url(any.type_url());
return result; return result;
} }
// Returns the options protobuf for a subgraph. // Returns the serialized protobuf.Any containing a message FieldData.
// TODO: Ensure that this works with multiple options protobufs. FieldData SerializeProtobufAny(const FieldData& data) {
absl::Status GetOptionsMessage( protobuf::Any any;
const proto_ns::RepeatedPtrField<mediapipe::protobuf::Any>& options_any, any.set_value(data.message_value().value());
const proto_ns::MessageLite& options_ext, FieldData* result) { any.set_type_url(data.message_value().type_url());
// Read the "graph_options" or "node_options" field. FieldData result;
for (const auto& options : options_any) { result.mutable_message_value()->set_value(any.SerializeAsString());
if (options.type_url().empty()) { result.mutable_message_value()->set_type_url(TypeUrl(kGoogleProtobufAny));
continue; return result;
} }
result->mutable_message_value()->set_type_url(options.type_url());
result->mutable_message_value()->set_value(std::string(options.value()));
return mediapipe::OkStatus();
}
// Read the "options" field. // Returns the field index of an extension type in a repeated field.
FieldData message_data; StatusOr<int> FindExtensionIndex(const FieldData& message_data,
*message_data.mutable_message_value()->mutable_value() = FieldPathEntry* entry) {
options_ext.SerializeAsString(); if (entry->field == nullptr || !IsProtobufAny(entry->field)) {
message_data.mutable_message_value()->set_type_url(options_ext.GetTypeName()); return -1;
std::vector<const FieldDescriptor*> ext_fields; }
OptionsRegistry::FindAllExtensions(options_ext.GetTypeName(), &ext_fields); std::string& extension_type = entry->extension_type;
for (auto ext_field : ext_fields) { std::vector<FieldData> field_values;
absl::Status status = GetField({{ext_field, 0}}, message_data, result); RET_CHECK_NE(entry->field, nullptr);
if (!status.ok()) { MP_RETURN_IF_ERROR(
return status; GetFieldValues(message_data, *entry->field, &field_values));
} for (int i = 0; i < field_values.size(); ++i) {
if (result->has_message_value()) { FieldData extension = ParseProtobufAny(field_values[i]);
return status; if (extension_type == "*" ||
ParseTypeUrl(extension.message_value().type_url()) == extension_type) {
return i;
} }
} }
return mediapipe::OkStatus(); return -1;
}
// Returns true if the value of a field is available.
bool HasField(const FieldPath& field_path, const FieldData& message_data) {
FieldData value;
return GetField(field_path, message_data, &value).ok() &&
value.value_case() != mediapipe::FieldData::VALUE_NOT_SET;
}
// Returns the extension field containing the specified extension-type.
const FieldDescriptor* FindExtensionField(const FieldData& message_data,
absl::string_view extension_type) {
std::string message_type =
ParseTypeUrl(message_data.message_value().type_url());
std::vector<const FieldDescriptor*> extensions;
OptionsRegistry::FindAllExtensions(message_type, &extensions);
for (const FieldDescriptor* extension : extensions) {
if (extension->message_type()->full_name() == extension_type) {
return extension;
}
if (extension_type == "*" && HasField({{extension, 0}}, message_data)) {
return extension;
}
}
return nullptr;
} }
// Sets a protobuf in a repeated protobuf::Any field. // Sets a protobuf in a repeated protobuf::Any field.
@ -234,6 +330,20 @@ void SetOptionsMessage(
*options_any->mutable_value() = node_options.message_value().value(); *options_any->mutable_value() = node_options.message_value().value();
} }
// Returns the count of values in a repeated field.
int FieldCount(const FieldData& message_data, const FieldDescriptor* field) {
const std::string& message_bytes = message_data.message_value().value();
FieldType field_type = AsFieldType(field->type());
ProtoUtilLite proto_util;
ProtoUtilLite::ProtoPath proto_path = {{field->number(), 0}};
int count;
if (proto_util.GetFieldCount(message_bytes, proto_path, field_type, &count)
.ok()) {
return count;
}
return 0;
}
} // anonymous namespace } // anonymous namespace
// Deserializes a packet containing a MessageLite value. // Deserializes a packet containing a MessageLite value.
@ -247,8 +357,8 @@ absl::Status ReadMessage(const std::string& value, const std::string& type_name,
} }
// Merge two options FieldData values. // Merge two options FieldData values.
absl::Status MergeOptionsMessages(const FieldData& base, const FieldData& over, absl::Status MergeMessages(const FieldData& base, const FieldData& over,
FieldData* result) { FieldData* result) {
absl::Status status; absl::Status status;
if (over.value_case() == FieldData::VALUE_NOT_SET) { if (over.value_case() == FieldData::VALUE_NOT_SET) {
*result = base; *result = base;
@ -278,28 +388,148 @@ absl::Status MergeOptionsMessages(const FieldData& base, const FieldData& over,
return status; return status;
} }
// Returns either the extension field or the repeated protobuf.Any field index
// holding the specified extension-type.
absl::Status FindExtension(const FieldData& message_data,
FieldPathEntry* entry) {
if (entry->extension_type.empty()) {
return absl::OkStatus();
}
// For repeated protobuf::Any, find the index for the extension_type.
ASSIGN_OR_RETURN(int index, FindExtensionIndex(message_data, entry));
if (index != -1) {
entry->index = index;
return absl::OkStatus();
}
// Returns the extension field containing the specified extension-type.
std::string& extension_type = entry->extension_type;
const FieldDescriptor* field =
FindExtensionField(message_data, extension_type);
if (field != nullptr) {
entry->field = field;
entry->index = 0;
return absl::OkStatus();
}
return absl::NotFoundError(
absl::StrCat("Option extension not found: ", extension_type));
}
// Return the FieldPath referencing an extension message.
FieldPath GetExtensionPath(const std::string& parent_type,
const std::string& extension_type,
const std::string& field_name,
bool is_protobuf_any) {
FieldPath result;
const tool::Descriptor* parent_descriptor =
tool::OptionsRegistry::GetProtobufDescriptor(parent_type);
FieldPathEntry field_entry;
field_entry.field = parent_descriptor->FindFieldByName(field_name);
if (is_protobuf_any) {
field_entry.extension_type = extension_type;
result = {std::move(field_entry)};
} else {
field_entry.index = 0;
FieldPathEntry extension_entry;
extension_entry.extension_type = extension_type;
result = {std::move(field_entry), std::move(extension_entry)};
}
return result;
}
// Returns the requested options protobuf for a graph node.
absl::Status GetNodeOptions(const FieldData& message_data,
const std::string& extension_type,
FieldData* result) {
constexpr char kOptionsName[] = "options";
constexpr char kNodeOptionsName[] = "node_options";
std::string parent_type = options_field_util::ParseTypeUrl(
std::string(message_data.message_value().type_url()));
FieldPath path;
Status status;
path = GetExtensionPath(parent_type, extension_type, kOptionsName, false);
status = GetField(path, message_data, result);
if (status.ok()) {
return status;
}
path = GetExtensionPath(parent_type, extension_type, kNodeOptionsName, true);
status = GetField(path, message_data, result);
return status;
}
// Returns the requested options protobuf for a graph.
absl::Status GetGraphOptions(const FieldData& message_data,
const std::string& extension_type,
FieldData* result) {
constexpr char kOptionsName[] = "options";
constexpr char kGraphOptionsName[] = "graph_options";
std::string parent_type = options_field_util::ParseTypeUrl(
std::string(message_data.message_value().type_url()));
FieldPath path;
Status status;
path = GetExtensionPath(parent_type, extension_type, kOptionsName, false);
status = GetField(path, message_data, result);
if (status.ok()) {
return status;
}
path = GetExtensionPath(parent_type, extension_type, kGraphOptionsName, true);
status = GetField(path, message_data, result);
return status;
}
// Reads a FieldData value from a protobuf field.
absl::Status GetField(const FieldPath& field_path,
const FieldData& message_data, FieldData* result) {
if (field_path.empty()) {
*result->mutable_message_value() = message_data.message_value();
return absl::OkStatus();
}
FieldPathEntry head = field_path.front();
FieldPath tail = field_path;
tail.erase(tail.begin());
if (!head.extension_type.empty()) {
MP_RETURN_IF_ERROR(FindExtension(message_data, &head));
}
if (tail.empty() && FieldCount(message_data, head.field) == 0) {
return absl::OkStatus();
}
MP_RETURN_IF_ERROR(GetFieldValue(message_data, head, result));
if (IsProtobufAny(head.field)) {
*result = ParseProtobufAny(*result);
}
if (!tail.empty()) {
FieldData child = *result;
MP_RETURN_IF_ERROR(GetField(tail, child, result));
}
return absl::OkStatus();
}
// Writes a FieldData value into protobuf field. // Writes a FieldData value into protobuf field.
absl::Status SetField(const FieldPath& field_path, const FieldData& value, absl::Status SetField(const FieldPath& field_path, const FieldData& value,
FieldData* message_data) { FieldData* message_data) {
if (field_path.empty()) { if (field_path.empty()) {
*message_data->mutable_message_value() = value.message_value(); *message_data->mutable_message_value() = value.message_value();
return mediapipe::OkStatus(); return absl::OkStatus();
} }
ProtoUtilLite proto_util; FieldPathEntry head = field_path.front();
const FieldDescriptor* field = field_path.back().first; FieldPath tail = field_path;
FieldType field_type = AsFieldType(field->type()); tail.erase(tail.begin());
std::string field_value; if (!head.extension_type.empty()) {
MP_RETURN_IF_ERROR(WriteField(value, field, &field_value)); MP_RETURN_IF_ERROR(FindExtension(*message_data, &head));
ProtoUtilLite::ProtoPath proto_path = AsProtoPath(field_path); }
std::string* message_bytes = if (tail.empty()) {
message_data->mutable_message_value()->mutable_value(); MP_RETURN_IF_ERROR(SetFieldValue(head, value, message_data));
int field_count; } else {
MP_RETURN_IF_ERROR(proto_util.GetFieldCount(*message_bytes, proto_path, FieldData child;
field_type, &field_count)); MP_RETURN_IF_ERROR(GetFieldValue(*message_data, head, &child));
MP_RETURN_IF_ERROR( MP_RETURN_IF_ERROR(SetField(tail, value, &child));
proto_util.ReplaceFieldRange(message_bytes, AsProtoPath(field_path), if (IsProtobufAny(head.field)) {
field_count, field_type, {field_value})); child = SerializeProtobufAny(child);
return mediapipe::OkStatus(); }
MP_RETURN_IF_ERROR(SetFieldValue(head, child, message_data));
}
return absl::OkStatus();
} }
// Merges a packet value into nested protobuf Message. // Merges a packet value into nested protobuf Message.
@ -308,7 +538,7 @@ absl::Status MergeField(const FieldPath& field_path, const FieldData& value,
absl::Status status; absl::Status status;
FieldType field_type = field_path.empty() FieldType field_type = field_path.empty()
? FieldType::TYPE_MESSAGE ? FieldType::TYPE_MESSAGE
: AsFieldType(field_path.back().first->type()); : AsFieldType(field_path.back().field->type());
std::string message_type = std::string message_type =
(value.has_message_value()) (value.has_message_value())
? ParseTypeUrl(std::string(value.message_value().type_url())) ? ParseTypeUrl(std::string(value.message_value().type_url()))
@ -317,49 +547,12 @@ absl::Status MergeField(const FieldPath& field_path, const FieldData& value,
if (field_type == FieldType::TYPE_MESSAGE) { if (field_type == FieldType::TYPE_MESSAGE) {
FieldData b; FieldData b;
status.Update(GetField(field_path, *message_data, &b)); status.Update(GetField(field_path, *message_data, &b));
status.Update(MergeOptionsMessages(b, v, &v)); status.Update(MergeMessages(b, v, &v));
} }
status.Update(SetField(field_path, v, message_data)); status.Update(SetField(field_path, v, message_data));
return status; return status;
} }
// Reads a packet value from a protobuf field.
absl::Status GetField(const FieldPath& field_path,
const FieldData& message_data, FieldData* result) {
if (field_path.empty()) {
*result->mutable_message_value() = message_data.message_value();
return mediapipe::OkStatus();
}
ProtoUtilLite proto_util;
const FieldDescriptor* field = field_path.back().first;
FieldType field_type = AsFieldType(field->type());
std::vector<std::string> field_values;
ProtoUtilLite::ProtoPath proto_path = AsProtoPath(field_path);
const std::string& message_bytes = message_data.message_value().value();
int field_count;
MP_RETURN_IF_ERROR(proto_util.GetFieldCount(message_bytes, proto_path,
field_type, &field_count));
if (field_count == 0) {
return mediapipe::OkStatus();
}
MP_RETURN_IF_ERROR(proto_util.GetFieldRange(message_bytes, proto_path, 1,
field_type, &field_values));
MP_RETURN_IF_ERROR(ReadField(field_values.front(), field, result));
return mediapipe::OkStatus();
}
// Returns the options protobuf for a graph.
absl::Status GetOptionsMessage(const CalculatorGraphConfig& config,
FieldData* result) {
return GetOptionsMessage(config.graph_options(), config.options(), result);
}
// Returns the options protobuf for a node.
absl::Status GetOptionsMessage(const CalculatorGraphConfig::Node& node,
FieldData* result) {
return GetOptionsMessage(node.node_options(), node.options(), result);
}
// Sets the node_options field in a Node, and clears the options field. // Sets the node_options field in a Node, and clears the options field.
void SetOptionsMessage(const FieldData& node_options, void SetOptionsMessage(const FieldData& node_options,
CalculatorGraphConfig::Node* node) { CalculatorGraphConfig::Node* node) {
@ -367,6 +560,16 @@ void SetOptionsMessage(const FieldData& node_options,
node->clear_options(); node->clear_options();
} }
// Serialize a MessageLite to a FieldData.
FieldData AsFieldData(const proto_ns::MessageLite& message) {
FieldData result;
*result.mutable_message_value()->mutable_value() =
message.SerializePartialAsString();
*result.mutable_message_value()->mutable_type_url() =
TypeUrl(message.GetTypeName());
return result;
}
// Represents a protobuf enum value stored in a Packet. // Represents a protobuf enum value stored in a Packet.
struct ProtoEnum { struct ProtoEnum {
ProtoEnum(int32 v) : value(v) {} ProtoEnum(int32 v) : value(v) {}
@ -415,7 +618,7 @@ absl::Status AsPacket(const FieldData& data, Packet* result) {
case FieldData::VALUE_NOT_SET: case FieldData::VALUE_NOT_SET:
*result = Packet(); *result = Packet();
} }
return mediapipe::OkStatus(); return absl::OkStatus();
} }
absl::Status AsFieldData(Packet packet, FieldData* result) { absl::Status AsFieldData(Packet packet, FieldData* result) {
@ -436,7 +639,7 @@ absl::Status AsFieldData(Packet packet, FieldData* result) {
packet.GetProtoMessageLite().SerializeAsString()); packet.GetProtoMessageLite().SerializeAsString());
result->mutable_message_value()->set_type_url( result->mutable_message_value()->set_type_url(
TypeUrl(packet.GetProtoMessageLite().GetTypeName())); TypeUrl(packet.GetProtoMessageLite().GetTypeName()));
return mediapipe::OkStatus(); return absl::OkStatus();
} }
if (kTypeIds->count(packet.GetTypeId()) == 0) { if (kTypeIds->count(packet.GetTypeId()) == 0) {
@ -473,7 +676,7 @@ absl::Status AsFieldData(Packet packet, FieldData* result) {
result->set_string_value(packet.Get<std::string>()); result->set_string_value(packet.Get<std::string>());
break; break;
} }
return mediapipe::OkStatus(); return absl::OkStatus();
} }
std::string TypeUrl(absl::string_view type_name) { std::string TypeUrl(absl::string_view type_name) {

View File

@ -19,8 +19,15 @@ namespace tool {
// Utility to read and write Packet data from protobuf fields. // Utility to read and write Packet data from protobuf fields.
namespace options_field_util { namespace options_field_util {
// A chain of nested fields and indexes. // A protobuf field and index description.
using FieldPath = std::vector<std::pair<const FieldDescriptor*, int>>; struct FieldPathEntry {
const FieldDescriptor* field = nullptr;
int index = -1;
std::string extension_type;
};
// A chain of nested protobuf fields and indexes.
using FieldPath = std::vector<FieldPathEntry>;
// Writes a field value into protobuf field. // Writes a field value into protobuf field.
absl::Status SetField(const FieldPath& field_path, const FieldData& value, absl::Status SetField(const FieldPath& field_path, const FieldData& value,
@ -39,21 +46,26 @@ absl::Status ReadMessage(const std::string& value, const std::string& type_name,
Packet* result); Packet* result);
// Merge two options protobuf field values. // Merge two options protobuf field values.
absl::Status MergeOptionsMessages(const FieldData& base, const FieldData& over, absl::Status MergeMessages(const FieldData& base, const FieldData& over,
FieldData* result); FieldData* result);
// Returns the options protobuf for a graph. // Returns the requested options protobuf for a graph.
absl::Status GetOptionsMessage(const CalculatorGraphConfig& config, absl::Status GetNodeOptions(const FieldData& message_data,
FieldData* result); const std::string& extension_type,
FieldData* result);
// Returns the options protobuf for a node. // Returns the requested options protobuf for a graph node.
absl::Status GetOptionsMessage(const CalculatorGraphConfig::Node& node, absl::Status GetGraphOptions(const FieldData& message_data,
FieldData* result); const std::string& extension_type,
FieldData* result);
// Sets the node_options field in a Node, and clears the options field. // Sets the node_options field in a Node, and clears the options field.
void SetOptionsMessage(const FieldData& node_options, void SetOptionsMessage(const FieldData& node_options,
CalculatorGraphConfig::Node* node); CalculatorGraphConfig::Node* node);
// Serialize a MessageLite to a FieldData.
FieldData AsFieldData(const proto_ns::MessageLite& message);
// Constructs a Packet for a FieldData proto. // Constructs a Packet for a FieldData proto.
absl::Status AsPacket(const FieldData& data, Packet* result); absl::Status AsPacket(const FieldData& data, Packet* result);

View File

@ -5,17 +5,42 @@
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/packet.h" #include "mediapipe/framework/packet.h"
#include "mediapipe/framework/packet_type.h" #include "mediapipe/framework/packet_type.h"
#include "mediapipe/framework/port/advanced_proto_inc.h" #include "mediapipe/framework/port/advanced_proto_inc.h"
#include "mediapipe/framework/port/any_proto.h" #include "mediapipe/framework/port/any_proto.h"
#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/tool/name_util.h" #include "mediapipe/framework/tool/name_util.h"
#include "mediapipe/framework/tool/options_registry.h"
namespace mediapipe { namespace mediapipe {
namespace tool { namespace tool {
namespace {
// StrSplit Delimiter to split strings at single colon tokens, ignoring
// double-colon tokens.
class SingleColonDelimiter {
public:
SingleColonDelimiter() {}
absl::string_view Find(absl::string_view text, size_t pos) const {
while (pos < text.length()) {
size_t p = text.find(':', pos);
p = (p == absl::string_view::npos) ? text.length() : p;
if (p >= text.length() - 1 || text[p + 1] != ':') {
return text.substr(p, 1);
}
pos = p + 2;
}
return text.substr(text.length(), 0);
}
};
} // namespace
// Helper functions for parsing the graph options syntax. // Helper functions for parsing the graph options syntax.
class OptionsSyntaxUtil::OptionsSyntaxHelper { class OptionsSyntaxUtil::OptionsSyntaxHelper {
public: public:
@ -31,13 +56,32 @@ class OptionsSyntaxUtil::OptionsSyntaxHelper {
// Returns the option protobuf field name for a tag or packet name. // Returns the option protobuf field name for a tag or packet name.
absl::string_view OptionFieldName(absl::string_view name) { return name; } absl::string_view OptionFieldName(absl::string_view name) { return name; }
// Return the extension-type specified for an option field.
absl::string_view ExtensionType(absl::string_view option_name) {
constexpr absl::string_view kExt = "Ext::";
if (absl::StartsWithIgnoreCase(option_name, kExt)) {
return option_name.substr(kExt.size());
}
return "";
}
// Returns the field names encoded in an options tag.
std::vector<absl::string_view> OptionTagNames(absl::string_view tag) {
if (absl::StartsWith(tag, syntax_.tag_name)) {
tag = tag.substr(syntax_.tag_name.length());
} else if (absl::StartsWith(tag, syntax_.packet_name)) {
tag = tag.substr(syntax_.packet_name.length());
}
if (absl::StartsWith(tag, syntax_.separator)) {
tag = tag.substr(syntax_.separator.length());
}
return absl::StrSplit(tag, syntax_.separator);
}
// Returns the field-path for an option stream-tag. // Returns the field-path for an option stream-tag.
FieldPath OptionFieldPath(const std::string& tag, FieldPath OptionFieldPath(absl::string_view tag,
const Descriptor* descriptor) { const Descriptor* descriptor) {
int prefix = syntax_.tag_name.length() + syntax_.separator.length(); std::vector<absl::string_view> name_tags = OptionTagNames(tag);
std::string suffix = tag.substr(prefix);
std::vector<absl::string_view> name_tags =
absl::StrSplit(suffix, syntax_.separator);
FieldPath result; FieldPath result;
for (absl::string_view name_tag : name_tags) { for (absl::string_view name_tag : name_tags) {
if (name_tag.empty()) { if (name_tag.empty()) {
@ -46,8 +90,16 @@ class OptionsSyntaxUtil::OptionsSyntaxHelper {
absl::string_view option_name = OptionFieldName(name_tag); absl::string_view option_name = OptionFieldName(name_tag);
int index; int index;
if (absl::SimpleAtoi(option_name, &index)) { if (absl::SimpleAtoi(option_name, &index)) {
result.back().second = index; result.back().index = index;
}
if (!ExtensionType(option_name).empty()) {
std::string extension_type = std::string(ExtensionType(option_name));
result.push_back({nullptr, 0, extension_type});
descriptor = OptionsRegistry::GetProtobufDescriptor(extension_type);
} else { } else {
if (descriptor == nullptr) {
break;
}
auto field = descriptor->FindFieldByName(std::string(option_name)); auto field = descriptor->FindFieldByName(std::string(option_name));
descriptor = field ? field->message_type() : nullptr; descriptor = field ? field->message_type() : nullptr;
result.push_back({std::move(field), 0}); result.push_back({std::move(field), 0});
@ -78,7 +130,7 @@ class OptionsSyntaxUtil::OptionsSyntaxHelper {
} }
// Converts slash-separated field names into a tag name. // Converts slash-separated field names into a tag name.
std::string OptionFieldsTag(const std::string& option_names) { std::string OptionFieldsTag(absl::string_view option_names) {
std::string tag_prefix = syntax_.tag_name + syntax_.separator; std::string tag_prefix = syntax_.tag_name + syntax_.separator;
std::vector<absl::string_view> names = absl::StrSplit(option_names, '/'); std::vector<absl::string_view> names = absl::StrSplit(option_names, '/');
if (!names.empty() && names[0] == syntax_.tag_name) { if (!names.empty() && names[0] == syntax_.tag_name) {
@ -129,15 +181,18 @@ OptionsSyntaxUtil::OptionsSyntaxUtil(const std::string& tag_name,
OptionsSyntaxUtil::~OptionsSyntaxUtil() {} OptionsSyntaxUtil::~OptionsSyntaxUtil() {}
std::string OptionsSyntaxUtil::OptionFieldsTag( std::string OptionsSyntaxUtil::OptionFieldsTag(absl::string_view option_names) {
const std::string& option_names) {
return syntax_helper_->OptionFieldsTag(option_names); return syntax_helper_->OptionFieldsTag(option_names);
} }
OptionsSyntaxUtil::FieldPath OptionsSyntaxUtil::OptionFieldPath( OptionsSyntaxUtil::FieldPath OptionsSyntaxUtil::OptionFieldPath(
const std::string& tag, const Descriptor* descriptor) { absl::string_view tag, const Descriptor* descriptor) {
return syntax_helper_->OptionFieldPath(tag, descriptor); return syntax_helper_->OptionFieldPath(tag, descriptor);
} }
std::vector<absl::string_view> OptionsSyntaxUtil::StrSplitTags(
absl::string_view tag_and_name) {
return absl::StrSplit(tag_and_name, SingleColonDelimiter());
}
} // namespace tool } // namespace tool
} // namespace mediapipe } // namespace mediapipe

View File

@ -28,12 +28,15 @@ class OptionsSyntaxUtil {
~OptionsSyntaxUtil(); ~OptionsSyntaxUtil();
// Converts slash-separated field names into a tag name. // Converts slash-separated field names into a tag name.
std::string OptionFieldsTag(const std::string& option_names); std::string OptionFieldsTag(absl::string_view option_names);
// Returns the field-path for an option stream-tag. // Returns the field-path for an option stream-tag.
FieldPath OptionFieldPath(const std::string& tag, FieldPath OptionFieldPath(absl::string_view tag,
const Descriptor* descriptor); const Descriptor* descriptor);
// Splits a std::string into "tag" and "name" delimited by a single colon.
std::vector<absl::string_view> StrSplitTags(absl::string_view tag_and_name);
private: private:
class OptionsSyntaxHelper; class OptionsSyntaxHelper;
std::unique_ptr<OptionsSyntaxHelper> syntax_helper_; std::unique_ptr<OptionsSyntaxHelper> syntax_helper_;

View File

@ -7,6 +7,7 @@
#include "absl/strings/ascii.h" #include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_context.h"
#include "mediapipe/framework/input_stream_shard.h" #include "mediapipe/framework/input_stream_shard.h"
#include "mediapipe/framework/output_side_packet.h" #include "mediapipe/framework/output_side_packet.h"
@ -24,50 +25,77 @@
namespace mediapipe { namespace mediapipe {
namespace tool { namespace tool {
using options_field_util::FieldPath;
using options_field_util::GetField;
using options_field_util::GetGraphOptions;
using options_field_util::GetNodeOptions;
using options_field_util::MergeField;
using options_field_util::MergeMessages;
// Returns the type for the root options message if specified.
std::string ExtensionType(const std::string& option_fields_tag) {
OptionsSyntaxUtil syntax_util;
options_field_util::FieldPath field_path =
syntax_util.OptionFieldPath(option_fields_tag, nullptr);
std::string result = !field_path.empty() ? field_path[0].extension_type : "";
return !result.empty() ? result : "*";
}
// Constructs a FieldPath for field names starting at a message type.
FieldPath GetPath(const std::string& path_tag,
const std::string& message_type) {
OptionsSyntaxUtil syntax_util;
const Descriptor* descriptor =
OptionsRegistry::GetProtobufDescriptor(message_type);
return syntax_util.OptionFieldPath(path_tag, descriptor);
}
// Returns the message type for a FieldData.
std::string MessageType(FieldData message) {
return options_field_util::ParseTypeUrl(
std::string(message.message_value().type_url()));
}
// Copy literal options from graph_options to node_options. // Copy literal options from graph_options to node_options.
absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node, absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
CalculatorGraphConfig* config) { CalculatorGraphConfig* config) {
Status status; Status status;
FieldData config_options, parent_node_options, graph_options; FieldData graph_data = options_field_util::AsFieldData(*config);
status.Update( FieldData parent_data = options_field_util::AsFieldData(parent_node);
options_field_util::GetOptionsMessage(*config, &config_options));
status.Update(
options_field_util::GetOptionsMessage(parent_node, &parent_node_options));
status.Update(options_field_util::MergeOptionsMessages(
config_options, parent_node_options, &graph_options));
const Descriptor* options_descriptor =
OptionsRegistry::GetProtobufDescriptor(options_field_util::ParseTypeUrl(
std::string(graph_options.message_value().type_url())));
if (!options_descriptor) {
return status;
}
OptionsSyntaxUtil syntax_util; OptionsSyntaxUtil syntax_util;
for (auto& node : *config->mutable_node()) { for (auto& node : *config->mutable_node()) {
FieldData node_data; FieldData node_data = options_field_util::AsFieldData(node);
status.Update(options_field_util::GetOptionsMessage(node, &node_data));
if (!node_data.has_message_value() || node.option_value_size() == 0) {
continue;
}
const Descriptor* node_options_descriptor =
OptionsRegistry::GetProtobufDescriptor(options_field_util::ParseTypeUrl(
std::string(node_data.message_value().type_url())));
if (!node_options_descriptor) {
continue;
}
for (const std::string& option_def : node.option_value()) { for (const std::string& option_def : node.option_value()) {
std::vector<std::string> tag_and_name = absl::StrSplit(option_def, ':'); std::vector<absl::string_view> tag_and_name =
syntax_util.StrSplitTags(option_def);
std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]); std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]);
std::string graph_extension_type = ExtensionType(graph_tag);
std::string node_tag = syntax_util.OptionFieldsTag(tag_and_name[0]); std::string node_tag = syntax_util.OptionFieldsTag(tag_and_name[0]);
std::string node_extension_type = ExtensionType(node_tag);
FieldData graph_options;
GetGraphOptions(graph_data, graph_extension_type, &graph_options)
.IgnoreError();
FieldData parent_options;
GetNodeOptions(parent_data, graph_extension_type, &parent_options)
.IgnoreError();
status.Update(
MergeMessages(graph_options, parent_options, &graph_options));
FieldData node_options;
status.Update(
GetNodeOptions(node_data, node_extension_type, &node_options));
if (!node_options.has_message_value() ||
!graph_options.has_message_value()) {
continue;
}
FieldPath graph_path = GetPath(graph_tag, MessageType(graph_options));
FieldPath node_path = GetPath(node_tag, MessageType(node_options));
FieldData packet_data; FieldData packet_data;
status.Update(options_field_util::GetField( status.Update(GetField(graph_path, graph_options, &packet_data));
syntax_util.OptionFieldPath(graph_tag, options_descriptor), status.Update(MergeField(node_path, packet_data, &node_options));
graph_options, &packet_data)); options_field_util::SetOptionsMessage(node_options, &node);
status.Update(options_field_util::MergeField(
syntax_util.OptionFieldPath(node_tag, node_options_descriptor),
packet_data, &node_data));
} }
options_field_util::SetOptionsMessage(node_data, &node);
} }
return status; return status;
} }

View File

@ -15,6 +15,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "absl/strings/string_view.h"
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/deps/message_matchers.h" #include "mediapipe/framework/deps/message_matchers.h"
#include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/gtest.h"
@ -22,6 +23,7 @@
#include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/testdata/night_light_calculator.pb.h" #include "mediapipe/framework/testdata/night_light_calculator.pb.h"
#include "mediapipe/framework/tool/node_chain_subgraph.pb.h" #include "mediapipe/framework/tool/node_chain_subgraph.pb.h"
#include "mediapipe/framework/tool/options_field_util.h"
#include "mediapipe/framework/tool/options_registry.h" #include "mediapipe/framework/tool/options_registry.h"
#include "mediapipe/framework/tool/options_syntax_util.h" #include "mediapipe/framework/tool/options_syntax_util.h"
@ -51,6 +53,35 @@ class NightLightCalculator : public CalculatorBase {
}; };
REGISTER_CALCULATOR(NightLightCalculator); REGISTER_CALCULATOR(NightLightCalculator);
using tool::options_field_util::FieldPath;
// Validates FieldPathEntry contents.
bool Equals(const tool::options_field_util::FieldPathEntry& entry,
const std::string& field_name, int index,
const std::string& extension_type) {
const std::string& name = entry.field ? entry.field->name() : "";
return name == field_name && entry.index == index &&
entry.extension_type == extension_type;
}
// Serializes a MessageLite into FieldData.message_value.
FieldData AsFieldData(const proto_ns::MessageLite& message) {
FieldData result;
*result.mutable_message_value()->mutable_value() =
message.SerializeAsString();
result.mutable_message_value()->set_type_url(message.GetTypeName());
return result;
}
// Returns the type for the root options message if specified.
std::string ExtensionType(const std::string& option_fields_tag) {
tool::OptionsSyntaxUtil syntax_util;
tool::options_field_util::FieldPath field_path =
syntax_util.OptionFieldPath(option_fields_tag, nullptr);
std::string result = !field_path.empty() ? field_path[0].extension_type : "";
return !result.empty() ? result : "*";
}
// Tests for calculator and graph options. // Tests for calculator and graph options.
// //
class OptionsUtilTest : public ::testing::Test { class OptionsUtilTest : public ::testing::Test {
@ -150,8 +181,8 @@ TEST_F(OptionsUtilTest, OptionsSyntaxUtil) {
EXPECT_EQ(tag, "OPTIONS/sub_options/num_lights"); EXPECT_EQ(tag, "OPTIONS/sub_options/num_lights");
field_path = syntax_util.OptionFieldPath(tag, descriptor); field_path = syntax_util.OptionFieldPath(tag, descriptor);
EXPECT_EQ(field_path.size(), 2); EXPECT_EQ(field_path.size(), 2);
EXPECT_EQ(field_path[0].first->name(), "sub_options"); EXPECT_EQ(field_path[0].field->name(), "sub_options");
EXPECT_EQ(field_path[1].first->name(), "num_lights"); EXPECT_EQ(field_path[1].field->name(), "num_lights");
} }
{ {
// A tag syntax with a text-coded separator. // A tag syntax with a text-coded separator.
@ -160,10 +191,100 @@ TEST_F(OptionsUtilTest, OptionsSyntaxUtil) {
EXPECT_EQ(tag, "OPTIONS_Z0Z_sub_options_Z0Z_num_lights"); EXPECT_EQ(tag, "OPTIONS_Z0Z_sub_options_Z0Z_num_lights");
field_path = syntax_util.OptionFieldPath(tag, descriptor); field_path = syntax_util.OptionFieldPath(tag, descriptor);
EXPECT_EQ(field_path.size(), 2); EXPECT_EQ(field_path.size(), 2);
EXPECT_EQ(field_path[0].first->name(), "sub_options"); EXPECT_EQ(field_path[0].field->name(), "sub_options");
EXPECT_EQ(field_path[1].first->name(), "num_lights"); EXPECT_EQ(field_path[1].field->name(), "num_lights");
} }
} }
TEST_F(OptionsUtilTest, OptionFieldPath) {
tool::OptionsSyntaxUtil syntax_util;
std::vector<absl::string_view> split;
split = syntax_util.StrSplitTags("a/graph/option:a/node/option");
EXPECT_EQ(2, split.size());
EXPECT_EQ(split[0], "a/graph/option");
EXPECT_EQ(split[1], "a/node/option");
split = syntax_util.StrSplitTags("Ext::a/graph/option:Ext::a/node/option");
EXPECT_EQ(2, split.size());
EXPECT_EQ(split[0], "Ext::a/graph/option");
EXPECT_EQ(split[1], "Ext::a/node/option");
split =
syntax_util.StrSplitTags("chain_length:options/sub_options/num_lights");
EXPECT_EQ(2, split.size());
EXPECT_EQ(split[0], "chain_length");
EXPECT_EQ(split[1], "options/sub_options/num_lights");
const tool::Descriptor* descriptor =
tool::OptionsRegistry::GetProtobufDescriptor(
"mediapipe.NightLightCalculatorOptions");
tool::options_field_util::FieldPath field_path =
syntax_util.OptionFieldPath(split[1], descriptor);
EXPECT_EQ(field_path.size(), 2);
EXPECT_EQ(field_path[0].field->name(), "sub_options");
EXPECT_EQ(field_path[1].field->name(), "num_lights");
}
TEST_F(OptionsUtilTest, FindOptionsMessage) {
tool::OptionsSyntaxUtil syntax_util;
std::vector<absl::string_view> split;
split =
syntax_util.StrSplitTags("chain_length:options/sub_options/num_lights");
EXPECT_EQ(2, split.size());
EXPECT_EQ(split[0], "chain_length");
EXPECT_EQ(split[1], "options/sub_options/num_lights");
const tool::Descriptor* descriptor =
tool::OptionsRegistry::GetProtobufDescriptor(
"mediapipe.NightLightCalculatorOptions");
tool::options_field_util::FieldPath field_path =
syntax_util.OptionFieldPath(split[1], descriptor);
EXPECT_EQ(field_path.size(), 2);
EXPECT_TRUE(Equals(field_path[0], "sub_options", 0, ""));
EXPECT_TRUE(Equals(field_path[1], "num_lights", 0, ""));
{
// NightLightCalculatorOptions in Node.options.
CalculatorGraphConfig::Node node;
NightLightCalculatorOptions* options =
node.mutable_options()->MutableExtension(
NightLightCalculatorOptions::ext);
options->mutable_sub_options()->add_num_lights(33);
// Retrieve the specified option.
FieldData node_data = AsFieldData(node);
auto path = field_path;
std::string node_extension_type = ExtensionType(std::string(split[1]));
FieldData node_options;
MP_EXPECT_OK(tool::options_field_util::GetNodeOptions(
node_data, node_extension_type, &node_options));
FieldData packet_data;
MP_EXPECT_OK(tool::options_field_util::GetField(field_path, node_options,
&packet_data));
EXPECT_EQ(packet_data.value_case(), FieldData::kInt32Value);
EXPECT_EQ(packet_data.int32_value(), 33);
}
{
// NightLightCalculatorOptions in Node.node_options.
CalculatorGraphConfig::Node node;
NightLightCalculatorOptions options;
options.mutable_sub_options()->add_num_lights(33);
node.add_node_options()->PackFrom(options);
// Retrieve the specified option.
FieldData node_data = AsFieldData(node);
auto path = field_path;
std::string node_extension_type = ExtensionType(std::string(split[1]));
FieldData node_options;
MP_EXPECT_OK(tool::options_field_util::GetNodeOptions(
node_data, node_extension_type, &node_options));
FieldData packet_data;
MP_EXPECT_OK(tool::options_field_util::GetField(field_path, node_options,
&packet_data));
EXPECT_EQ(packet_data.value_case(), FieldData::kInt32Value);
EXPECT_EQ(packet_data.int32_value(), 33);
}
// TODO: Test with specified extension_type.
}
} // namespace } // namespace
} // namespace mediapipe } // namespace mediapipe

View File

@ -207,16 +207,20 @@ cc_library(
cc_library( cc_library(
name = "gpu_buffer", name = "gpu_buffer",
srcs = ["gpu_buffer.cc"],
hdrs = ["gpu_buffer.h"], hdrs = ["gpu_buffer.h"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":gl_base", ":gl_base",
":gl_context",
":gpu_buffer_format", ":gpu_buffer_format",
"//mediapipe/framework/formats:image_frame",
] + select({ ] + select({
"//conditions:default": [ "//conditions:default": [
":gl_texture_buffer", ":gl_texture_buffer",
], ],
"//mediapipe:ios": [ "//mediapipe:ios": [
"//mediapipe/objc:util",
"//mediapipe/objc:CFHolder", "//mediapipe/objc:CFHolder",
], ],
"//mediapipe:macos": [ "//mediapipe:macos": [
@ -478,6 +482,7 @@ cc_library(
"//mediapipe:ios": [ "//mediapipe:ios": [
":pixel_buffer_pool_util", ":pixel_buffer_pool_util",
"//mediapipe/objc:CFHolder", "//mediapipe/objc:CFHolder",
"//mediapipe/objc:util",
], ],
"//mediapipe:macos": [ "//mediapipe:macos": [
":pixel_buffer_pool_util", ":pixel_buffer_pool_util",
@ -498,55 +503,40 @@ cc_library(
], ],
) )
HELPER_ANDROID_SRCS = [ cc_library(
"gl_calculator_helper_impl_android.cc", name = "egl_surface_holder",
"gl_calculator_helper_impl_common.cc", hdrs = ["egl_surface_holder.h"],
] deps = [
":gl_base",
HELPER_ANDROID_HDRS = [ "@com_google_absl//absl/synchronization",
"egl_surface_holder.h",
]
HELPER_COMMON_SRCS = [
"gl_calculator_helper.cc",
]
HELPER_COMMON_HDRS = [
"gl_calculator_helper.h",
"gl_calculator_helper_impl.h",
]
HELPER_IOS_SRCS = [
"gl_calculator_helper_impl_ios.mm",
"gl_calculator_helper_impl_common.cc",
]
HELPER_IOS_FRAMEWORKS = [
"AVFoundation",
"CoreVideo",
"CoreGraphics",
"CoreMedia",
"GLKit",
"QuartzCore",
] + select({
"//conditions:default": [
"OpenGLES",
], ],
"//mediapipe:macos": [ )
"OpenGL",
"AppKit",
],
})
cc_library( cc_library(
name = "gl_calculator_helper", name = "gl_calculator_helper",
srcs = select({ srcs = [
"//conditions:default": HELPER_COMMON_SRCS + HELPER_ANDROID_SRCS, "gl_calculator_helper.cc",
"//mediapipe:apple": [], "gl_calculator_helper_impl_common.cc",
}), ],
hdrs = HELPER_COMMON_HDRS + select({ hdrs = [
"//conditions:default": HELPER_ANDROID_HDRS, "gl_calculator_helper.h",
"//mediapipe:apple": [], "gl_calculator_helper_impl.h",
],
linkopts = select({
"//conditions:default": [],
"//mediapipe:apple": [
"-framework AVFoundation",
"-framework CoreVideo",
"-framework CoreGraphics",
"-framework CoreMedia",
"-framework GLKit",
"-framework QuartzCore",
],
}) + select({
"//conditions:default": [],
"//mediapipe:macos": [
"-framework AppKit",
],
}), }),
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
@ -582,34 +572,20 @@ cc_library(
] + select({ ] + select({
"//conditions:default": [ "//conditions:default": [
], ],
"//mediapipe:apple": [ "//mediapipe:apple": [],
":gl_calculator_helper_ios",
"//mediapipe/objc:util",
"//mediapipe/objc:CFHolder",
],
}), }),
) )
# TODO: remove
objc_library( objc_library(
name = "gl_calculator_helper_ios", name = "gl_calculator_helper_ios",
srcs = HELPER_COMMON_SRCS + HELPER_IOS_SRCS,
hdrs = HELPER_COMMON_HDRS,
copts = [ copts = [
"-Wno-shorten-64-to-32", "-Wno-shorten-64-to-32",
"-std=c++17", "-std=c++17",
], ],
sdk_frameworks = HELPER_IOS_FRAMEWORKS,
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":gl_base", ":gl_calculator_helper",
":gl_context",
":gpu_buffer",
":gpu_buffer_multi_pool",
":gpu_service",
":gpu_shared_data_internal",
":shader_util",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image",
"//mediapipe/objc:mediapipe_framework_ios", "//mediapipe/objc:mediapipe_framework_ios",
"//mediapipe/objc:util", "//mediapipe/objc:util",
], ],
@ -769,6 +745,7 @@ cc_library(
srcs = ["gl_surface_sink_calculator.cc"], srcs = ["gl_surface_sink_calculator.cc"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":egl_surface_holder",
":gl_calculator_helper", ":gl_calculator_helper",
":gl_quad_renderer", ":gl_quad_renderer",
":gpu_buffer", ":gpu_buffer",

View File

@ -24,15 +24,8 @@
#include "mediapipe/gpu/gpu_buffer.h" #include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gpu_service.h" #include "mediapipe/gpu/gpu_service.h"
#ifdef __APPLE__
#include "mediapipe/objc/util.h"
#endif
namespace mediapipe { namespace mediapipe {
GlTexture::GlTexture(GLuint name, int width, int height)
: name_(name), width_(width), height_(height), target_(GL_TEXTURE_2D) {}
// The constructor and destructor need to be defined here so that // The constructor and destructor need to be defined here so that
// std::unique_ptr can see the full definition of GlCalculatorHelperImpl. // std::unique_ptr can see the full definition of GlCalculatorHelperImpl.
// In the header, it is an incomplete type. // In the header, it is an incomplete type.

View File

@ -31,8 +31,6 @@
#ifdef __APPLE__ #ifdef __APPLE__
#include <CoreVideo/CoreVideo.h> #include <CoreVideo/CoreVideo.h>
#include "mediapipe/objc/CFHolder.h"
#endif // __APPLE__ #endif // __APPLE__
namespace mediapipe { namespace mediapipe {
@ -42,14 +40,6 @@ class GlTexture;
class GpuResources; class GpuResources;
struct GpuSharedData; struct GpuSharedData;
#ifdef __APPLE__
#if TARGET_OS_OSX
typedef CVOpenGLTextureRef CVTextureType;
#else
typedef CVOpenGLESTextureRef CVTextureType;
#endif // TARGET_OS_OSX
#endif // __APPLE__
using ImageFrameSharedPtr = std::shared_ptr<ImageFrame>; using ImageFrameSharedPtr = std::shared_ptr<ImageFrame>;
// TODO: remove this and Process below, or make Process available // TODO: remove this and Process below, or make Process available
@ -174,14 +164,12 @@ class GlCalculatorHelper {
class GlTexture { class GlTexture {
public: public:
GlTexture() {} GlTexture() {}
GlTexture(GLuint name, int width, int height);
~GlTexture() { Release(); } ~GlTexture() { Release(); }
int width() const { return width_; } int width() const { return view_.width(); }
int height() const { return height_; } int height() const { return view_.height(); }
GLenum target() const { return target_; } GLenum target() const { return view_.target(); }
GLuint name() const { return name_; } GLuint name() const { return view_.name(); }
// Returns a buffer that can be sent to another calculator. // Returns a buffer that can be sent to another calculator.
// & manages sync token // & manages sync token
@ -190,26 +178,12 @@ class GlTexture {
std::unique_ptr<T> GetFrame() const; std::unique_ptr<T> GetFrame() const;
// Releases texture memory & manages sync token // Releases texture memory & manages sync token
void Release(); void Release() { view_.Release(); }
private: private:
explicit GlTexture(GlTextureView view) : view_(std::move(view)) {}
friend class GlCalculatorHelperImpl; friend class GlCalculatorHelperImpl;
GlCalculatorHelperImpl* helper_impl_ = nullptr; GlTextureView view_;
GLuint name_ = 0;
int width_ = 0;
int height_ = 0;
GLenum target_ = GL_TEXTURE_2D;
#ifdef MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
// For CVPixelBufferRef-based rendering
CFHolder<CVTextureType> cv_texture_;
#else
// Keeps track of whether this texture mapping is for read access, so that
// we can create a consumer sync point when releasing it.
bool for_reading_ = false;
#endif
GpuBuffer gpu_buffer_;
int plane_ = 0;
}; };
// Returns the entry with the given tag if the collection uses tags, with the // Returns the entry with the given tag if the collection uses tags, with the

View File

@ -58,19 +58,14 @@ class GlCalculatorHelperImpl {
GlContext& GetGlContext() const; GlContext& GetGlContext() const;
// For internal use. // For internal use.
void ReadTexture(const GlTexture& texture, void* output, size_t size); static void ReadTexture(const GlTextureView& view, void* output, size_t size);
private: private:
// Makes a GpuBuffer accessible as a texture in the GL context. // Makes a GpuBuffer accessible as a texture in the GL context.
GlTexture MapGpuBuffer(const GpuBuffer& gpu_buffer, int plane); GlTexture MapGpuBuffer(const GpuBuffer& gpu_buffer, int plane,
bool for_reading);
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER void AttachGlTexture(GlTexture& texture, const GpuBuffer& gpu_buffer,
GlTexture MapGlTextureBuffer(const GlTextureBufferSharedPtr& texture_buffer); int plane, bool for_reading);
GlTextureBufferSharedPtr MakeGlTextureBuffer(const ImageFrame& image_frame);
#endif // !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
// Sets default texture filtering parameters.
void SetStandardTextureParams(GLenum target, GLint internal_format);
// Create the framebuffer for rendering. // Create the framebuffer for rendering.
void CreateFramebuffer(); void CreateFramebuffer();
@ -80,10 +75,6 @@ class GlCalculatorHelperImpl {
GLuint framebuffer_ = 0; GLuint framebuffer_ = 0;
GpuResources& gpu_resources_; GpuResources& gpu_resources_;
// Necessary to compute for a given GlContext in order to properly enforce the
// SetStandardTextureParams.
bool can_linear_filter_float_textures_;
}; };
} // namespace mediapipe } // namespace mediapipe

View File

@ -1,102 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "mediapipe/gpu/gl_calculator_helper_impl.h"
#include "mediapipe/gpu/gpu_shared_data_internal.h"
namespace mediapipe {
// TODO: move this method to GlCalculatorHelper, then we can
// access its framebuffer instead of requiring that one is already set.
template <>
std::unique_ptr<ImageFrame> GlTexture::GetFrame<ImageFrame>() const {
auto output =
absl::make_unique<ImageFrame>(ImageFormat::SRGBA, width_, height_,
ImageFrame::kGlDefaultAlignmentBoundary);
CHECK(helper_impl_);
helper_impl_->ReadTexture(*this, output->MutablePixelData(),
output->PixelDataSize());
return output;
}
template <>
std::unique_ptr<GpuBuffer> GlTexture::GetFrame<GpuBuffer>() const {
#ifdef __EMSCRIPTEN__
// When WebGL is used, the GL context may be spontaneously lost which can
// cause GpuBuffer allocations to fail. In that case, return a dummy buffer
// to allow processing of the current frame complete.
if (!gpu_buffer_) {
return std::make_unique<GpuBuffer>();
}
#endif // __EMSCRIPTEN__
CHECK(gpu_buffer_);
// Inform the GlTextureBuffer that we have produced new content, and create
// a producer sync point.
gpu_buffer_.GetGlTextureBufferSharedPtr()->Updated(
helper_impl_->GetGlContext().CreateSyncToken());
#ifdef __ANDROID__
// On (some?) Android devices, the texture may need to be explicitly
// detached from the current framebuffer.
// TODO: is this necessary even with the unbind in BindFramebuffer?
// It is not clear if this affected other contexts too, but let's keep it
// while in doubt.
GLint type = GL_NONE;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE,
&type);
if (type == GL_TEXTURE) {
GLint color_attachment = 0;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME,
&color_attachment);
if (color_attachment == name_) {
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
}
// Some Android drivers log a GL_INVALID_ENUM error after the first
// glGetFramebufferAttachmentParameteriv call if there is no bound object,
// even though it should be ok to ask for the type and get back GL_NONE.
// Let's just ignore any pending errors here.
GLenum error;
while ((error = glGetError()) != GL_NO_ERROR) {
}
#endif // __ANDROID__
return absl::make_unique<GpuBuffer>(gpu_buffer_);
}
void GlTexture::Release() {
if (for_reading_ && gpu_buffer_) {
// Inform the GlTextureBuffer that we have finished accessing its contents,
// and create a consumer sync point.
gpu_buffer_.GetGlTextureBufferSharedPtr()->DidRead(
helper_impl_->GetGlContext().CreateSyncToken());
}
helper_impl_ = nullptr;
for_reading_ = false;
gpu_buffer_ = nullptr;
plane_ = 0;
name_ = 0;
width_ = 0;
height_ = 0;
}
} // namespace mediapipe

View File

@ -25,17 +25,6 @@ GlCalculatorHelperImpl::GlCalculatorHelperImpl(CalculatorContext* cc,
GpuResources* gpu_resources) GpuResources* gpu_resources)
: gpu_resources_(*gpu_resources) { : gpu_resources_(*gpu_resources) {
gl_context_ = gpu_resources_.gl_context(cc); gl_context_ = gpu_resources_.gl_context(cc);
// GL_ES_VERSION_2_0 and up (at least through ES 3.2) may contain the extension.
// Checking against one also checks against higher ES versions. So this checks
// against GLES >= 2.0.
#if GL_ES_VERSION_2_0
// No linear float filtering by default, check extensions.
can_linear_filter_float_textures_ =
gl_context_->HasGlExtension("OES_texture_float_linear");
#else
// Any float32 texture we create should automatically have linear filtering.
can_linear_filter_float_textures_ = true;
#endif // GL_ES_VERSION_2_0
} }
GlCalculatorHelperImpl::~GlCalculatorHelperImpl() { GlCalculatorHelperImpl::~GlCalculatorHelperImpl() {
@ -101,98 +90,59 @@ void GlCalculatorHelperImpl::BindFramebuffer(const GlTexture& dst) {
#endif #endif
} }
void GlCalculatorHelperImpl::SetStandardTextureParams(GLenum target, GlTexture GlCalculatorHelperImpl::MapGpuBuffer(const GpuBuffer& gpu_buffer,
GLint internal_format) { int plane, bool for_reading) {
// Default to using linear filter everywhere. For float32 textures, fall back GlTextureView view = gpu_buffer.GetGlTextureView(plane, for_reading);
// to GL_NEAREST if linear filtering unsupported.
GLint filter;
switch (internal_format) {
case GL_R32F:
case GL_RG32F:
case GL_RGBA32F:
// 32F (unlike 16f) textures do not always support texture filtering
// (According to OpenGL ES specification [TEXTURE IMAGE SPECIFICATION])
filter = can_linear_filter_float_textures_ ? GL_LINEAR : GL_NEAREST;
break;
default:
filter = GL_LINEAR;
}
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER if (gpu_buffer.format() != GpuBufferFormat::kUnknown) {
GlTexture GlCalculatorHelperImpl::CreateSourceTexture( // TODO: do the params need to be reset here??
const ImageFrame& image_frame) { glBindTexture(view.target(), view.name());
GlTexture texture = MapGlTextureBuffer(MakeGlTextureBuffer(image_frame)); GlTextureInfo info = GlTextureInfoForGpuBufferFormat(
texture.for_reading_ = true; gpu_buffer.format(), view.plane(), GetGlVersion());
return texture; gl_context_->SetStandardTextureParams(view.target(),
info.gl_internal_format);
glBindTexture(view.target(), 0);
}
return GlTexture(std::move(view));
} }
GlTexture GlCalculatorHelperImpl::CreateSourceTexture( GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer) { const GpuBuffer& gpu_buffer) {
GlTexture texture = MapGpuBuffer(gpu_buffer, 0); return MapGpuBuffer(gpu_buffer, 0, true);
texture.for_reading_ = true;
return texture;
} }
GlTexture GlCalculatorHelperImpl::CreateSourceTexture( GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer, int plane) { const GpuBuffer& gpu_buffer, int plane) {
GlTexture texture = MapGpuBuffer(gpu_buffer, plane); return MapGpuBuffer(gpu_buffer, plane, true);
texture.for_reading_ = true;
return texture;
} }
GlTexture GlCalculatorHelperImpl::MapGpuBuffer(const GpuBuffer& gpu_buffer, GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
int plane) {
CHECK_EQ(plane, 0);
return MapGlTextureBuffer(gpu_buffer.GetGlTextureBufferSharedPtr());
}
GlTexture GlCalculatorHelperImpl::MapGlTextureBuffer(
const GlTextureBufferSharedPtr& texture_buffer) {
// Insert wait call to sync with the producer.
texture_buffer->WaitOnGpu();
GlTexture texture;
texture.helper_impl_ = this;
texture.gpu_buffer_ = GpuBuffer(texture_buffer);
texture.plane_ = 0;
texture.width_ = texture_buffer->width_;
texture.height_ = texture_buffer->height_;
texture.target_ = texture_buffer->target_;
texture.name_ = texture_buffer->name_;
if (texture_buffer->format() != GpuBufferFormat::kUnknown) {
// TODO: do the params need to be reset here??
glBindTexture(texture.target(), texture.name());
GlTextureInfo info = GlTextureInfoForGpuBufferFormat(
texture_buffer->format(), texture.plane_, GetGlVersion());
SetStandardTextureParams(texture.target(), info.gl_internal_format);
glBindTexture(texture.target(), 0);
}
return texture;
}
GlTextureBufferSharedPtr GlCalculatorHelperImpl::MakeGlTextureBuffer(
const ImageFrame& image_frame) { const ImageFrame& image_frame) {
CHECK(gl_context_->IsCurrent()); GlTexture texture =
MapGpuBuffer(GpuBuffer::CopyingImageFrame(image_frame), 0, true);
auto buffer = GlTextureBuffer::Create(image_frame); return texture;
}
if (buffer->format_ != GpuBufferFormat::kUnknown) {
glBindTexture(GL_TEXTURE_2D, buffer->name_); template <>
GlTextureInfo info = GlTextureInfoForGpuBufferFormat( std::unique_ptr<ImageFrame> GlTexture::GetFrame<ImageFrame>() const {
buffer->format_, /*plane=*/0, GetGlVersion()); return view_.gpu_buffer().AsImageFrame();
SetStandardTextureParams(buffer->target_, info.gl_internal_format); }
glBindTexture(GL_TEXTURE_2D, 0);
} template <>
std::unique_ptr<GpuBuffer> GlTexture::GetFrame<GpuBuffer>() const {
return buffer; auto gpu_buffer = view_.gpu_buffer();
#ifdef __EMSCRIPTEN__
// When WebGL is used, the GL context may be spontaneously lost which can
// cause GpuBuffer allocations to fail. In that case, return a dummy buffer
// to allow processing of the current frame complete.
if (!gpu_buffer) {
return std::make_unique<GpuBuffer>();
}
#endif // __EMSCRIPTEN__
view_.DoneWriting();
return absl::make_unique<GpuBuffer>(gpu_buffer);
} }
#endif // !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
GlTexture GlCalculatorHelperImpl::CreateDestinationTexture( GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(
int width, int height, GpuBufferFormat format) { int width, int height, GpuBufferFormat format) {
@ -202,44 +152,9 @@ GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(
GpuBuffer buffer = GpuBuffer buffer =
gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format); gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format);
GlTexture texture = MapGpuBuffer(buffer, 0); GlTexture texture = MapGpuBuffer(buffer, 0, false);
return texture; return texture;
} }
void GlCalculatorHelperImpl::ReadTexture(const GlTexture& texture, void* output,
size_t size) {
CHECK_GE(size, texture.width_ * texture.height_ * 4);
GLint current_fbo;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &current_fbo);
CHECK_NE(current_fbo, 0);
GLint color_attachment_name;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME,
&color_attachment_name);
if (color_attachment_name != texture.name_) {
// Save the viewport. Note that we assume that the color attachment is a
// GL_TEXTURE_2D texture.
GLint viewport[4];
glGetIntegerv(GL_VIEWPORT, viewport);
// Set the data from GLTexture object.
glViewport(0, 0, texture.width_, texture.height_);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
texture.target_, texture.name_, 0);
glReadPixels(0, 0, texture.width_, texture.height_, GL_RGBA,
GL_UNSIGNED_BYTE, output);
// Restore from the saved viewport and color attachment name.
glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_attachment_name, 0);
} else {
glReadPixels(0, 0, texture.width_, texture.height_, GL_RGBA,
GL_UNSIGNED_BYTE, output);
}
}
} // namespace mediapipe } // namespace mediapipe

View File

@ -1,197 +0,0 @@
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/gpu/gl_calculator_helper_impl.h"
#if TARGET_OS_OSX
#import <AppKit/NSOpenGL.h>
#else
#import <OpenGLES/EAGL.h>
#endif // TARGET_OS_OSX
#import <AVFoundation/AVFoundation.h>
#include "absl/memory/memory.h"
#include "mediapipe/gpu/gpu_buffer_multi_pool.h"
#include "mediapipe/gpu/pixel_buffer_pool_util.h"
#include "mediapipe/objc/util.h"
namespace mediapipe {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const mediapipe::ImageFrame& image_frame) {
GlTexture texture;
texture.helper_impl_ = this;
texture.width_ = image_frame.Width();
texture.height_ = image_frame.Height();
auto format = GpuBufferFormatForImageFormat(image_frame.Format());
GlTextureInfo info = GlTextureInfoForGpuBufferFormat(format, 0, GetGlVersion());
glGenTextures(1, &texture.name_);
glBindTexture(GL_TEXTURE_2D, texture.name_);
glTexImage2D(GL_TEXTURE_2D, 0, info.gl_internal_format, texture.width_,
texture.height_, 0, info.gl_format, info.gl_type,
image_frame.PixelData());
SetStandardTextureParams(GL_TEXTURE_2D, info.gl_internal_format);
return texture;
}
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer) {
return MapGpuBuffer(gpu_buffer, 0);
}
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer, int plane) {
return MapGpuBuffer(gpu_buffer, plane);
}
GlTexture GlCalculatorHelperImpl::MapGpuBuffer(
const GpuBuffer& gpu_buffer, int plane) {
CVReturn err;
GlTexture texture;
texture.helper_impl_ = this;
texture.gpu_buffer_ = gpu_buffer;
texture.plane_ = plane;
const GlTextureInfo info =
GlTextureInfoForGpuBufferFormat(gpu_buffer.format(), plane, GetGlVersion());
// When scale is not 1, we still give the nominal size of the image.
texture.width_ = gpu_buffer.width();
texture.height_ = gpu_buffer.height();
#if TARGET_OS_OSX
CVOpenGLTextureRef cv_texture_temp;
err = CVOpenGLTextureCacheCreateTextureFromImage(
kCFAllocatorDefault, gl_context_->cv_texture_cache(), gpu_buffer.GetCVPixelBufferRef(), NULL,
&cv_texture_temp);
NSCAssert(cv_texture_temp && !err,
@"Error at CVOpenGLTextureCacheCreateTextureFromImage %d", err);
texture.cv_texture_.adopt(cv_texture_temp);
texture.target_ = CVOpenGLTextureGetTarget(*texture.cv_texture_);
texture.name_ = CVOpenGLTextureGetName(*texture.cv_texture_);
#else
CVOpenGLESTextureRef cv_texture_temp;
err = CVOpenGLESTextureCacheCreateTextureFromImage(
kCFAllocatorDefault, gl_context_->cv_texture_cache(), gpu_buffer.GetCVPixelBufferRef(), NULL,
GL_TEXTURE_2D, info.gl_internal_format, texture.width_ / info.downscale,
texture.height_ / info.downscale, info.gl_format, info.gl_type, plane,
&cv_texture_temp);
NSCAssert(cv_texture_temp && !err,
@"Error at CVOpenGLESTextureCacheCreateTextureFromImage %d", err);
texture.cv_texture_.adopt(cv_texture_temp);
texture.target_ = CVOpenGLESTextureGetTarget(*texture.cv_texture_);
texture.name_ = CVOpenGLESTextureGetName(*texture.cv_texture_);
#endif // TARGET_OS_OSX
glBindTexture(texture.target(), texture.name());
SetStandardTextureParams(texture.target(), info.gl_internal_format);
return texture;
}
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
template<>
std::unique_ptr<ImageFrame> GlTexture::GetFrame<ImageFrame>() const {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
if (gpu_buffer_.GetCVPixelBufferRef()) {
return CreateImageFrameForCVPixelBuffer(gpu_buffer_.GetCVPixelBufferRef());
}
ImageFormat::Format image_format =
ImageFormatForGpuBufferFormat(gpu_buffer_.format());
CHECK(helper_impl_);
GlTextureInfo info =
GlTextureInfoForGpuBufferFormat(gpu_buffer_.format(), plane_, helper_impl_->GetGlVersion());
auto output = absl::make_unique<ImageFrame>(
image_format, width_, height_);
glReadPixels(0, 0, width_, height_, info.gl_format, info.gl_type,
output->MutablePixelData());
return output;
#else
CHECK(gpu_buffer_.format() == GpuBufferFormat::kBGRA32);
auto output =
absl::make_unique<ImageFrame>(ImageFormat::SRGBA, width_, height_,
ImageFrame::kGlDefaultAlignmentBoundary);
CHECK(helper_impl_);
helper_impl_->ReadTexture(*this, output->MutablePixelData(), output->PixelDataSize());
return output;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
}
template<>
std::unique_ptr<GpuBuffer> GlTexture::GetFrame<GpuBuffer>() const {
NSCAssert(gpu_buffer_, @"gpu_buffer_ must be valid");
#if TARGET_IPHONE_SIMULATOR
CVPixelBufferRef pixel_buffer = gpu_buffer_.GetCVPixelBufferRef();
CVReturn err = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
NSCAssert(err == kCVReturnSuccess, @"CVPixelBufferLockBaseAddress failed: %d", err);
OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer);
size_t bytes_per_row = CVPixelBufferGetBytesPerRow(pixel_buffer);
uint8_t* pixel_ptr = static_cast<uint8_t*>(CVPixelBufferGetBaseAddress(pixel_buffer));
if (pixel_format == kCVPixelFormatType_32BGRA) {
// TODO: restore previous framebuffer? Move this to helper so we can
// use BindFramebuffer?
glViewport(0, 0, width_, height_);
glFramebufferTexture2D(
GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target_, name_, 0);
size_t contiguous_bytes_per_row = width_ * 4;
if (bytes_per_row == contiguous_bytes_per_row) {
glReadPixels(0, 0, width_, height_, GL_BGRA, GL_UNSIGNED_BYTE, pixel_ptr);
} else {
std::vector<uint8_t> contiguous_buffer(contiguous_bytes_per_row * height_);
uint8_t* temp_ptr = contiguous_buffer.data();
glReadPixels(0, 0, width_, height_, GL_BGRA, GL_UNSIGNED_BYTE, temp_ptr);
for (int i = 0; i < height_; ++i) {
memcpy(pixel_ptr, temp_ptr, contiguous_bytes_per_row);
temp_ptr += contiguous_bytes_per_row;
pixel_ptr += bytes_per_row;
}
}
} else {
uint32_t format_big = CFSwapInt32HostToBig(pixel_format);
NSLog(@"unsupported pixel format: %.4s", (char*)&format_big);
}
err = CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
NSCAssert(err == kCVReturnSuccess, @"CVPixelBufferUnlockBaseAddress failed: %d", err);
#endif
return absl::make_unique<GpuBuffer>(gpu_buffer_);
}
void GlTexture::Release() {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
if (*cv_texture_) {
cv_texture_.reset(NULL);
} else if (name_) {
// This is only needed because of the glGenTextures in
// CreateSourceTexture(ImageFrame)... change.
glDeleteTextures(1, &name_);
}
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
helper_impl_ = nullptr;
gpu_buffer_ = nullptr;
plane_ = 0;
name_ = 0;
width_ = 0;
height_ = 0;
}
} // namespace mediapipe

View File

@ -222,6 +222,9 @@ bool GlContext::HasGlExtension(absl::string_view extension) const {
// to work with GL_EXTENSIONS for newer GL versions, so we must maintain both // to work with GL_EXTENSIONS for newer GL versions, so we must maintain both
// variations of this function. // variations of this function.
absl::Status GlContext::GetGlExtensions() { absl::Status GlContext::GetGlExtensions() {
// RET_CHECK logs by default, but here we just want to check the precondition;
// we'll fall back to the alternative implementation for older versions.
RET_CHECK(gl_major_version_ >= 3).SetNoLogging();
gl_extensions_.clear(); gl_extensions_.clear();
// glGetStringi only introduced in GL 3.0+; so we exit out this function if // glGetStringi only introduced in GL 3.0+; so we exit out this function if
// we don't have that function defined, regardless of version number reported. // we don't have that function defined, regardless of version number reported.
@ -330,13 +333,24 @@ absl::Status GlContext::FinishInitialization(bool create_thread) {
LOG(INFO) << "GL version: " << gl_major_version_ << "." << gl_minor_version_ LOG(INFO) << "GL version: " << gl_major_version_ << "." << gl_minor_version_
<< " (" << glGetString(GL_VERSION) << ")"; << " (" << glGetString(GL_VERSION) << ")";
if (gl_major_version_ >= 3) { {
auto status = GetGlExtensions(); auto status = GetGlExtensions();
if (status.ok()) { if (!status.ok()) {
return absl::OkStatus(); status = GetGlExtensionsCompat();
} }
MP_RETURN_IF_ERROR(status);
} }
return GetGlExtensionsCompat();
#if GL_ES_VERSION_2_0 // This actually means "is GLES available".
// No linear float filtering by default, check extensions.
can_linear_filter_float_textures_ =
HasGlExtension("OES_texture_float_linear");
#else
// Desktop GL should always allow linear filtering.
can_linear_filter_float_textures_ = true;
#endif // GL_ES_VERSION_2_0
return absl::OkStatus();
}); });
} }
@ -841,4 +855,25 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format,
return GlTextureInfoForGpuBufferFormat(format, plane, ctx->GetGlVersion()); return GlTextureInfoForGpuBufferFormat(format, plane, ctx->GetGlVersion());
} }
void GlContext::SetStandardTextureParams(GLenum target, GLint internal_format) {
// Default to using linear filter everywhere. For float32 textures, fall back
// to GL_NEAREST if linear filtering unsupported.
GLint filter;
switch (internal_format) {
case GL_R32F:
case GL_RG32F:
case GL_RGBA32F:
// 32F (unlike 16f) textures do not always support texture filtering
// (According to OpenGL ES specification [TEXTURE IMAGE SPECIFICATION])
filter = can_linear_filter_float_textures_ ? GL_LINEAR : GL_NEAREST;
break;
default:
filter = GL_LINEAR;
}
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
} // namespace mediapipe } // namespace mediapipe

View File

@ -276,6 +276,9 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
}).IgnoreError(); }).IgnoreError();
} }
// Sets default texture filtering parameters.
void SetStandardTextureParams(GLenum target, GLint internal_format);
// These are used for testing specific SyncToken implementations. Do not use // These are used for testing specific SyncToken implementations. Do not use
// outside of tests. // outside of tests.
enum class SyncTokenTypeForTest { enum class SyncTokenTypeForTest {
@ -342,11 +345,11 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
// This wraps a thread_local. // This wraps a thread_local.
static std::weak_ptr<GlContext>& CurrentContext(); static std::weak_ptr<GlContext>& CurrentContext();
static absl::Status SwitchContext(ContextBinding* old_context, static absl::Status SwitchContext(ContextBinding* saved_context,
const ContextBinding& new_context); const ContextBinding& new_context);
absl::Status EnterContext(ContextBinding* previous_context); absl::Status EnterContext(ContextBinding* saved_context);
absl::Status ExitContext(const ContextBinding* previous_context); absl::Status ExitContext(const ContextBinding* saved_context);
void DestroyContext(); void DestroyContext();
bool HasContext() const; bool HasContext() const;
@ -383,7 +386,7 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
static void GetCurrentContextBinding(ContextBinding* binding); static void GetCurrentContextBinding(ContextBinding* binding);
// Makes the context described by new_context current on this thread. // Makes the context described by new_context current on this thread.
static absl::Status SetCurrentContextBinding( static absl::Status SetCurrentContextBinding(
const ContextBinding& new_context); const ContextBinding& new_binding);
// If not null, a dedicated thread used to execute tasks on this context. // If not null, a dedicated thread used to execute tasks on this context.
// Used on Android due to expensive context switching on some configurations. // Used on Android due to expensive context switching on some configurations.
@ -396,6 +399,10 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
// so we should be fine storing the extension pieces as string_view's. // so we should be fine storing the extension pieces as string_view's.
std::set<absl::string_view> gl_extensions_; std::set<absl::string_view> gl_extensions_;
// Used by SetStandardTextureParams. Do we want several of these bools, or a
// better mechanism?
bool can_linear_filter_float_textures_;
// Number of glFinish calls completed on the GL thread. // Number of glFinish calls completed on the GL thread.
// Changes should be guarded by mutex_. However, we use simple atomic // Changes should be guarded by mutex_. However, we use simple atomic
// loads for efficiency on the fast path. // loads for efficiency on the fast path.

View File

@ -85,7 +85,7 @@ GlContext::StatusOrGlContext GlContext::Create(EGLContext share_context,
return std::move(context); return std::move(context);
} }
absl::Status GlContext::CreateContextInternal(EGLContext external_context, absl::Status GlContext::CreateContextInternal(EGLContext share_context,
int gl_version) { int gl_version) {
CHECK(gl_version == 2 || gl_version == 3); CHECK(gl_version == 2 || gl_version == 3);
@ -131,8 +131,7 @@ absl::Status GlContext::CreateContextInternal(EGLContext external_context,
// clang-format on // clang-format on
}; };
context_ = context_ = eglCreateContext(display_, config_, share_context, context_attr);
eglCreateContext(display_, config_, external_context, context_attr);
int error = eglGetError(); int error = eglGetError();
RET_CHECK(context_ != EGL_NO_CONTEXT) RET_CHECK(context_ != EGL_NO_CONTEXT)
<< "Could not create GLES " << gl_version << " context; " << "Could not create GLES " << gl_version << " context; "
@ -149,7 +148,7 @@ absl::Status GlContext::CreateContextInternal(EGLContext external_context,
return absl::OkStatus(); return absl::OkStatus();
} }
absl::Status GlContext::CreateContext(EGLContext external_context) { absl::Status GlContext::CreateContext(EGLContext share_context) {
EGLint major = 0; EGLint major = 0;
EGLint minor = 0; EGLint minor = 0;
@ -163,11 +162,11 @@ absl::Status GlContext::CreateContext(EGLContext external_context) {
LOG(INFO) << "Successfully initialized EGL. Major : " << major LOG(INFO) << "Successfully initialized EGL. Major : " << major
<< " Minor: " << minor; << " Minor: " << minor;
auto status = CreateContextInternal(external_context, 3); auto status = CreateContextInternal(share_context, 3);
if (!status.ok()) { if (!status.ok()) {
LOG(WARNING) << "Creating a context with OpenGL ES 3 failed: " << status; LOG(WARNING) << "Creating a context with OpenGL ES 3 failed: " << status;
LOG(WARNING) << "Fall back on OpenGL ES 2."; LOG(WARNING) << "Fall back on OpenGL ES 2.";
status = CreateContextInternal(external_context, 2); status = CreateContextInternal(share_context, 2);
} }
MP_RETURN_IF_ERROR(status); MP_RETURN_IF_ERROR(status);

View File

@ -36,7 +36,7 @@ class GlContext::DedicatedThread {
DedicatedThread& operator=(DedicatedThread) = delete; DedicatedThread& operator=(DedicatedThread) = delete;
absl::Status Run(GlStatusFunction gl_func); absl::Status Run(GlStatusFunction gl_func);
void RunWithoutWaiting(GlVoidFunction gl_fund); void RunWithoutWaiting(GlVoidFunction gl_func);
bool IsCurrentThread(); bool IsCurrentThread();

View File

@ -175,18 +175,16 @@
mediapipe::GlCalculatorHelper helper; mediapipe::GlCalculatorHelper helper;
helper.InitializeForTest(&gpuData); helper.InitializeForTest(&gpuData);
std::vector<std::pair<int, int>> sizes{ helper.RunInGlContext([&helper] {
{200, 300}, std::vector<std::pair<int, int>> sizes{
{200, 299}, {200, 300}, {200, 299}, {196, 300}, {194, 300}, {193, 300},
{196, 300}, };
{194, 300}, for (const auto& width_height : sizes) {
{193, 300}, mediapipe::GlTexture texture =
}; helper.CreateDestinationTexture(width_height.first, width_height.second);
for (const auto& width_height : sizes) { XCTAssertNotEqual(texture.name(), 0);
mediapipe::GlTexture texture = }
helper.CreateDestinationTexture(width_height.first, width_height.second); });
XCTAssertNotEqual(texture.name(), 0);
}
} }
- (void)testSimpleConversionFromFormat:(OSType)cvPixelFormat { - (void)testSimpleConversionFromFormat:(OSType)cvPixelFormat {

260
mediapipe/gpu/gpu_buffer.cc Normal file
View File

@ -0,0 +1,260 @@
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/gpu/gl_context.h"
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#include "mediapipe/objc/util.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
namespace mediapipe {
void GlTextureView::Release() {
if (detach_) detach_(*this);
detach_ = nullptr;
gl_context_ = nullptr;
gpu_buffer_ = nullptr;
plane_ = 0;
name_ = 0;
width_ = 0;
height_ = 0;
}
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#if TARGET_OS_OSX
typedef CVOpenGLTextureRef CVTextureType;
#else
typedef CVOpenGLESTextureRef CVTextureType;
#endif // TARGET_OS_OSX
GlTextureView GpuBuffer::GetGlTextureView(int plane, bool for_reading) const {
CVReturn err;
auto gl_context = GlContext::GetCurrent();
CHECK(gl_context);
#if TARGET_OS_OSX
CVTextureType cv_texture_temp;
err = CVOpenGLTextureCacheCreateTextureFromImage(
kCFAllocatorDefault, gl_context->cv_texture_cache(),
GetCVPixelBufferRef(), NULL, &cv_texture_temp);
CHECK(cv_texture_temp && !err)
<< "CVOpenGLTextureCacheCreateTextureFromImage failed: " << err;
CFHolder<CVTextureType> cv_texture;
cv_texture.adopt(cv_texture_temp);
return GlTextureView(
gl_context.get(), CVOpenGLTextureGetTarget(*cv_texture),
CVOpenGLTextureGetName(*cv_texture), width(), height(), *this, plane,
[cv_texture](
mediapipe::GlTextureView&) { /* only retains cv_texture */ });
#else
const GlTextureInfo info = GlTextureInfoForGpuBufferFormat(
format(), plane, gl_context->GetGlVersion());
CVTextureType cv_texture_temp;
err = CVOpenGLESTextureCacheCreateTextureFromImage(
kCFAllocatorDefault, gl_context->cv_texture_cache(),
GetCVPixelBufferRef(), NULL, GL_TEXTURE_2D, info.gl_internal_format,
width() / info.downscale, height() / info.downscale, info.gl_format,
info.gl_type, plane, &cv_texture_temp);
CHECK(cv_texture_temp && !err)
<< "CVOpenGLESTextureCacheCreateTextureFromImage failed: " << err;
CFHolder<CVTextureType> cv_texture;
cv_texture.adopt(cv_texture_temp);
return GlTextureView(
gl_context.get(), CVOpenGLESTextureGetTarget(*cv_texture),
CVOpenGLESTextureGetName(*cv_texture), width(), height(), *this, plane,
[cv_texture](
mediapipe::GlTextureView&) { /* only retains cv_texture */ });
#endif // TARGET_OS_OSX
}
GpuBuffer GpuBuffer::CopyingImageFrame(const ImageFrame& image_frame) {
auto maybe_buffer = CreateCVPixelBufferCopyingImageFrame(image_frame);
// Converts absl::StatusOr to absl::Status since CHECK_OK() currently only
// deals with absl::Status in MediaPipe OSS.
CHECK_OK(maybe_buffer.status());
return GpuBuffer(std::move(maybe_buffer).value());
}
std::unique_ptr<ImageFrame> GpuBuffer::AsImageFrame() const {
CHECK(GetCVPixelBufferRef());
return CreateImageFrameForCVPixelBuffer(GetCVPixelBufferRef());
}
void GlTextureView::DoneWriting() const {
CHECK(gpu_buffer_);
#if TARGET_IPHONE_SIMULATOR
CVPixelBufferRef pixel_buffer = gpu_buffer_.GetCVPixelBufferRef();
CVReturn err = CVPixelBufferLockBaseAddress(pixel_buffer, 0);
CHECK(err == kCVReturnSuccess)
<< "CVPixelBufferLockBaseAddress failed: " << err;
OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer);
size_t bytes_per_row = CVPixelBufferGetBytesPerRow(pixel_buffer);
uint8_t* pixel_ptr =
static_cast<uint8_t*>(CVPixelBufferGetBaseAddress(pixel_buffer));
if (pixel_format == kCVPixelFormatType_32BGRA) {
// TODO: restore previous framebuffer? Move this to helper so we
// can use BindFramebuffer?
glViewport(0, 0, width(), height());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target(),
name(), 0);
size_t contiguous_bytes_per_row = width() * 4;
if (bytes_per_row == contiguous_bytes_per_row) {
glReadPixels(0, 0, width(), height(), GL_BGRA, GL_UNSIGNED_BYTE,
pixel_ptr);
} else {
std::vector<uint8_t> contiguous_buffer(contiguous_bytes_per_row *
height());
uint8_t* temp_ptr = contiguous_buffer.data();
glReadPixels(0, 0, width(), height(), GL_BGRA, GL_UNSIGNED_BYTE,
temp_ptr);
for (int i = 0; i < height(); ++i) {
memcpy(pixel_ptr, temp_ptr, contiguous_bytes_per_row);
temp_ptr += contiguous_bytes_per_row;
pixel_ptr += bytes_per_row;
}
}
} else {
LOG(ERROR) << "unsupported pixel format: " << pixel_format;
}
err = CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);
CHECK(err == kCVReturnSuccess)
<< "CVPixelBufferUnlockBaseAddress failed: " << err;
#endif
}
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
GlTextureView GpuBuffer::GetGlTextureView(int plane, bool for_reading) const {
auto gl_context = GlContext::GetCurrent();
CHECK(gl_context);
const GlTextureBufferSharedPtr& texture_buffer =
GetGlTextureBufferSharedPtr();
// Insert wait call to sync with the producer.
texture_buffer->WaitOnGpu();
CHECK_EQ(plane, 0);
GlTextureView::DetachFn detach;
if (for_reading) {
detach = [](mediapipe::GlTextureView& texture) {
// Inform the GlTextureBuffer that we have finished accessing its
// contents, and create a consumer sync point.
texture.gpu_buffer().GetGlTextureBufferSharedPtr()->DidRead(
texture.gl_context()->CreateSyncToken());
};
}
return GlTextureView(gl_context.get(), texture_buffer->target(),
texture_buffer->name(), width(), height(), *this, plane,
std::move(detach));
}
GpuBuffer GpuBuffer::CopyingImageFrame(const ImageFrame& image_frame) {
auto gl_context = GlContext::GetCurrent();
CHECK(gl_context);
auto buffer = GlTextureBuffer::Create(image_frame);
// TODO: does this need to set the texture params? We set them again when the
// texture is actually acccessed via GlTexture[View]. Or should they always be
// set on creation?
if (buffer->format() != GpuBufferFormat::kUnknown) {
glBindTexture(GL_TEXTURE_2D, buffer->name());
GlTextureInfo info = GlTextureInfoForGpuBufferFormat(
buffer->format(), /*plane=*/0, gl_context->GetGlVersion());
gl_context->SetStandardTextureParams(buffer->target(),
info.gl_internal_format);
glBindTexture(GL_TEXTURE_2D, 0);
}
return GpuBuffer(std::move(buffer));
}
static void ReadTexture(const GlTextureView& view, void* output, size_t size) {
// TODO: check buffer size? We could use glReadnPixels where available
// (OpenGL ES 3.2, i.e. nowhere). Note that, to fully check that the read
// won't overflow the buffer with glReadPixels, we'd also need to check or
// reset several glPixelStore parameters (e.g. what if someone had the
// ill-advised idea of setting GL_PACK_SKIP_PIXELS?).
CHECK(view.gl_context());
GlTextureInfo info =
GlTextureInfoForGpuBufferFormat(view.gpu_buffer().format(), view.plane(),
view.gl_context()->GetGlVersion());
GLint current_fbo;
glGetIntegerv(GL_FRAMEBUFFER_BINDING, &current_fbo);
CHECK_NE(current_fbo, 0);
GLint color_attachment_name;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME,
&color_attachment_name);
if (color_attachment_name != view.name()) {
// Save the viewport. Note that we assume that the color attachment is a
// GL_TEXTURE_2D texture.
GLint viewport[4];
glGetIntegerv(GL_VIEWPORT, viewport);
// Set the data from GLTextureView object.
glViewport(0, 0, view.width(), view.height());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, view.target(),
view.name(), 0);
glReadPixels(0, 0, view.width(), view.height(), info.gl_format,
info.gl_type, output);
// Restore from the saved viewport and color attachment name.
glViewport(viewport[0], viewport[1], viewport[2], viewport[3]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_attachment_name, 0);
} else {
glReadPixels(0, 0, view.width(), view.height(), info.gl_format,
info.gl_type, output);
}
}
std::unique_ptr<ImageFrame> GpuBuffer::AsImageFrame() const {
ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format());
auto output = absl::make_unique<ImageFrame>(
image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary);
auto view = GetGlTextureView(0, true);
ReadTexture(view, output->MutablePixelData(), output->PixelDataSize());
return output;
}
void GlTextureView::DoneWriting() const {
CHECK(gpu_buffer_);
// Inform the GlTextureBuffer that we have produced new content, and create
// a producer sync point.
gpu_buffer_.GetGlTextureBufferSharedPtr()->Updated(
gl_context()->CreateSyncToken());
#ifdef __ANDROID__
// On (some?) Android devices, the texture may need to be explicitly
// detached from the current framebuffer.
// TODO: is this necessary even with the unbind in BindFramebuffer?
// It is not clear if this affected other contexts too, but let's keep it
// while in doubt.
GLint type = GL_NONE;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE,
&type);
if (type == GL_TEXTURE) {
GLint color_attachment = 0;
glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME,
&color_attachment);
if (color_attachment == name()) {
glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
}
// Some Android drivers log a GL_INVALID_ENUM error after the first
// glGetFramebufferAttachmentParameteriv call if there is no bound object,
// even though it should be ok to ask for the type and get back GL_NONE.
// Let's just ignore any pending errors here.
GLenum error;
while ((error = glGetError()) != GL_NO_ERROR) {
}
#endif // __ANDROID__
}
#endif // !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
} // namespace mediapipe

View File

@ -17,6 +17,7 @@
#include <utility> #include <utility>
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/gpu/gl_base.h" #include "mediapipe/gpu/gl_base.h"
#include "mediapipe/gpu/gpu_buffer_format.h" #include "mediapipe/gpu/gpu_buffer_format.h"
@ -32,6 +33,9 @@
namespace mediapipe { namespace mediapipe {
class GlContext;
class GlTextureView;
// This class wraps a platform-specific buffer of GPU data. // This class wraps a platform-specific buffer of GPU data.
// An instance of GpuBuffer acts as an opaque reference to the underlying // An instance of GpuBuffer acts as an opaque reference to the underlying
// data object. // data object.
@ -84,6 +88,19 @@ class GpuBuffer {
// Allow assignment from nullptr. // Allow assignment from nullptr.
GpuBuffer& operator=(std::nullptr_t other); GpuBuffer& operator=(std::nullptr_t other);
// TODO: split into read and write, remove const from write.
GlTextureView GetGlTextureView(int plane, bool for_reading) const;
// Make a GpuBuffer copying the data from an ImageFrame.
static GpuBuffer CopyingImageFrame(const ImageFrame& image_frame);
// Make an ImageFrame, possibly sharing the same data. The data is shared if
// the GpuBuffer's storage supports memory sharing; otherwise, it is copied.
// In order to work correctly across platforms, callers should always treat
// the returned ImageFrame as if it shares memory with the GpuBuffer, i.e.
// treat it as immutable if the GpuBuffer must not be modified.
std::unique_ptr<ImageFrame> AsImageFrame() const;
private: private:
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
CFHolder<CVPixelBufferRef> pixel_buffer_; CFHolder<CVPixelBufferRef> pixel_buffer_;
@ -92,6 +109,51 @@ class GpuBuffer {
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
}; };
class GlTextureView {
public:
GlTextureView() {}
~GlTextureView() { Release(); }
// TODO: make this class move-only.
GlContext* gl_context() const { return gl_context_; }
int width() const { return width_; }
int height() const { return height_; }
GLenum target() const { return target_; }
GLuint name() const { return name_; }
const GpuBuffer& gpu_buffer() const { return gpu_buffer_; }
int plane() const { return plane_; }
private:
friend class GpuBuffer;
using DetachFn = std::function<void(GlTextureView&)>;
GlTextureView(GlContext* context, GLenum target, GLuint name, int width,
int height, GpuBuffer gpu_buffer, int plane, DetachFn detach)
: gl_context_(context),
target_(target),
name_(name),
width_(width),
height_(height),
gpu_buffer_(std::move(gpu_buffer)),
plane_(plane),
detach_(std::move(detach)) {}
// TODO: remove this friend declaration.
friend class GlTexture;
void Release();
// TODO: make this non-const.
void DoneWriting() const;
GlContext* gl_context_ = nullptr;
GLenum target_ = GL_TEXTURE_2D;
GLuint name_ = 0;
// Note: when scale is not 1, we still give the nominal size of the image.
int width_ = 0;
int height_ = 0;
GpuBuffer gpu_buffer_;
int plane_ = 0;
DetachFn detach_;
};
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
inline int GpuBuffer::width() const { inline int GpuBuffer::width() const {

View File

@ -21,10 +21,11 @@
#include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/logging.h"
#include "mediapipe/gpu/gpu_shared_data_internal.h" #include "mediapipe/gpu/gpu_shared_data_internal.h"
#ifdef __APPLE__ #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#include "CoreFoundation/CFBase.h" #include "CoreFoundation/CFBase.h"
#include "mediapipe/objc/CFHolder.h" #include "mediapipe/objc/CFHolder.h"
#endif // __APPLE__ #include "mediapipe/objc/util.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
namespace mediapipe { namespace mediapipe {

View File

@ -63,11 +63,6 @@ CVReturn CreateCVPixelBufferWithPool(CVPixelBufferPoolRef pool,
CFDictionaryRef CreateCVPixelBufferPoolAuxiliaryAttributesForThreshold( CFDictionaryRef CreateCVPixelBufferPoolAuxiliaryAttributesForThreshold(
int allocationThreshold); int allocationThreshold);
// Create a CVPixelBuffer without using a pool.
CVReturn CreateCVPixelBufferWithoutPool(int width, int height,
OSType pixelFormat,
CVPixelBufferRef* outBuffer);
} // namespace mediapipe } // namespace mediapipe
#endif // MEDIAPIPE_GPU_PIXEL_BUFFER_POOL_UTIL_H_ #endif // MEDIAPIPE_GPU_PIXEL_BUFFER_POOL_UTIL_H_

View File

@ -121,33 +121,4 @@ CVReturn CreateCVPixelBufferWithPool(
return err; return err;
} }
#if TARGET_IPHONE_SIMULATOR
static void FreeRefConReleaseCallback(void* refCon, const void* baseAddress) {
free(refCon);
}
#endif
CVReturn CreateCVPixelBufferWithoutPool(
int width, int height, OSType pixelFormat, CVPixelBufferRef* outBuffer) {
#if TARGET_IPHONE_SIMULATOR
// On the simulator, syncing the texture with the pixelbuffer does not work,
// and we have to use glReadPixels. Since GL_UNPACK_ROW_LENGTH is not
// available in OpenGL ES 2, we should create the buffer so the pixels are
// contiguous.
//
// TODO: verify if we can use kIOSurfaceBytesPerRow to force
// CoreVideo to give us contiguous data.
size_t bytes_per_row = width * 4;
void* data = malloc(bytes_per_row * height);
return CVPixelBufferCreateWithBytes(
kCFAllocatorDefault, width, height, pixelFormat, data, bytes_per_row,
FreeRefConReleaseCallback, data, GetCVPixelBufferAttributesForGlCompatibility(),
outBuffer);
#else
return CVPixelBufferCreate(
kCFAllocatorDefault, width, height, pixelFormat,
GetCVPixelBufferAttributesForGlCompatibility(), outBuffer);
#endif
}
} // namespace mediapipe } // namespace mediapipe

View File

@ -28,7 +28,7 @@ namespace mediapipe {
namespace { namespace {
constexpr int kNumFaceLandmarkConnections = 124; constexpr int kNumFaceLandmarkConnections = 132;
// Pairs of landmark indices to be rendered with connections. // Pairs of landmark indices to be rendered with connections.
constexpr int kFaceLandmarkConnections[] = { constexpr int kFaceLandmarkConnections[] = {
// Lips. // Lips.
@ -43,6 +43,8 @@ constexpr int kFaceLandmarkConnections[] = {
133, 133,
// Left eyebrow. // Left eyebrow.
46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107, 46, 53, 53, 52, 52, 65, 65, 55, 70, 63, 63, 105, 105, 66, 66, 107,
// Left iris.
474, 475, 475, 476, 476, 477, 477, 474,
// Right eye. // Right eye.
263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382, 263, 249, 249, 390, 390, 373, 373, 374, 374, 380, 380, 381, 381, 382, 382,
362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398, 362, 263, 466, 466, 388, 388, 387, 387, 386, 386, 385, 385, 384, 384, 398,
@ -50,6 +52,8 @@ constexpr int kFaceLandmarkConnections[] = {
// Right eyebrow. // Right eyebrow.
276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296, 276, 283, 283, 282, 282, 295, 295, 285, 300, 293, 293, 334, 334, 296, 296,
336, 336,
// Right iris.
469, 470, 470, 471, 471, 472, 472, 469,
// Face oval. // Face oval.
10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356, 10, 338, 338, 297, 297, 332, 332, 284, 284, 251, 251, 389, 389, 356, 356,
454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378, 454, 454, 323, 323, 361, 361, 288, 288, 397, 397, 365, 365, 379, 379, 378,

View File

@ -22,10 +22,12 @@ node {
# Defines side packets for further use in the graph. # Defines side packets for further use in the graph.
node { node {
calculator: "ConstantSidePacketCalculator" calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces" output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: { node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 } packet { int_value: 1 }
packet { bool_value: true }
} }
} }
} }
@ -35,6 +37,7 @@ node {
calculator: "FaceLandmarkFrontCpu" calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:input_video" input_stream: "IMAGE:input_video"
input_side_packet: "NUM_FACES:num_faces" input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections" output_stream: "DETECTIONS:face_detections"

View File

@ -33,10 +33,12 @@ node {
# Defines side packets for further use in the graph. # Defines side packets for further use in the graph.
node { node {
calculator: "ConstantSidePacketCalculator" calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces" output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: { node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 } packet { int_value: 1 }
packet { bool_value: true }
} }
} }
} }
@ -46,6 +48,7 @@ node {
calculator: "FaceLandmarkFrontCpu" calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces" input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections" output_stream: "DETECTIONS:face_detections"

View File

@ -33,10 +33,12 @@ node {
# Defines side packets for further use in the graph. # Defines side packets for further use in the graph.
node { node {
calculator: "ConstantSidePacketCalculator" calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:num_faces" output_side_packet: "PACKET:0:num_faces"
output_side_packet: "PACKET:1:with_attention"
node_options: { node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { int_value: 1 } packet { int_value: 1 }
packet { bool_value: true }
} }
} }
} }
@ -46,6 +48,7 @@ node {
calculator: "FaceLandmarkFrontGpu" calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces" input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections" output_stream: "DETECTIONS:face_detections"

View File

@ -33,11 +33,23 @@ node {
output_stream: "throttled_input_video" output_stream: "throttled_input_video"
} }
# Defines side packets for further use in the graph.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:with_attention"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true }
}
}
}
# Subgraph that detects faces and corresponding landmarks. # Subgraph that detects faces and corresponding landmarks.
node { node {
calculator: "FaceLandmarkFrontGpu" calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_side_packet: "NUM_FACES:num_faces" input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "DETECTIONS:face_detections" output_stream: "DETECTIONS:face_detections"

View File

@ -511,7 +511,7 @@ public class ExternalTextureConverter implements TextureFrameProducer {
frame.getHeight(), frame.getHeight(),
frame.getTimestamp())); frame.getTimestamp()));
} }
frame.waitUntilReleased(); frame.waitUntilReleasedWithGpuSync();
if (Log.isLoggable(TAG, Log.VERBOSE)) { if (Log.isLoggable(TAG, Log.VERBOSE)) {
Log.v( Log.v(
TAG, TAG,

View File

@ -66,7 +66,9 @@ public class AppTextureFrame implements TextureFrame {
/** /**
* Waits until the consumer is done with the texture. * Waits until the consumer is done with the texture.
* @throws InterruptedException *
* <p>This does a CPU wait for the texture to be complete.
* Use {@link waitUntilReleasedWithGpuSync} whenever possible.
*/ */
public void waitUntilReleased() throws InterruptedException { public void waitUntilReleased() throws InterruptedException {
synchronized (this) { synchronized (this) {
@ -82,6 +84,26 @@ public class AppTextureFrame implements TextureFrame {
} }
} }
/**
* Waits until the consumer is done with the texture.
*
* <p>This method must be called within the application's GL context that will overwrite the
* TextureFrame.
*/
public void waitUntilReleasedWithGpuSync() throws InterruptedException {
synchronized (this) {
while (inUse && releaseSyncToken == null) {
wait();
}
if (releaseSyncToken != null) {
releaseSyncToken.waitOnGpu();
releaseSyncToken.release();
inUse = false;
releaseSyncToken = null;
}
}
}
/** /**
* Returns whether the texture is currently in use. * Returns whether the texture is currently in use.
* *

View File

@ -37,9 +37,18 @@ public class GraphTextureFrame implements TextureFrame {
this.timestamp = timestamp; this.timestamp = timestamp;
} }
/** Returns the name of the underlying OpenGL texture. */ /**
* Returns the name of the underlying OpenGL texture.
*
* <p>Note: if this texture has been obtained using getTextureFrameDeferredWait, a GPU wait on the
* producer sync will be done here. That means this method should be called on the GL context that
* will actually use the texture.
*/
@Override @Override
public int getTextureName() { public int getTextureName() {
// Note that, if a CPU wait has already been done, the sync point will have been
// cleared and this will turn into a no-op. See GlFenceSyncPoint::Wait.
nativeGpuWait(nativeBufferHandle);
return textureName; return textureName;
} }
@ -92,4 +101,6 @@ public class GraphTextureFrame implements TextureFrame {
private native int nativeGetTextureName(long nativeHandle); private native int nativeGetTextureName(long nativeHandle);
private native int nativeGetWidth(long nativeHandle); private native int nativeGetWidth(long nativeHandle);
private native int nativeGetHeight(long nativeHandle); private native int nativeGetHeight(long nativeHandle);
private native void nativeGpuWait(long nativeHandle);
} }

View File

@ -288,7 +288,18 @@ public final class PacketGetter {
*/ */
public static GraphTextureFrame getTextureFrame(final Packet packet) { public static GraphTextureFrame getTextureFrame(final Packet packet) {
return new GraphTextureFrame( return new GraphTextureFrame(
nativeGetGpuBuffer(packet.getNativeHandle()), packet.getTimestamp()); nativeGetGpuBuffer(packet.getNativeHandle(), /* waitOnCpu= */ true), packet.getTimestamp());
}
/**
* Works like {@link #getTextureFrame(Packet)}, but does not insert a CPU wait for the texture's
* producer before returning. Instead, a GPU wait will automatically occur when
* GraphTextureFrame#getTextureName is called.
*/
public static GraphTextureFrame getTextureFrameDeferredSync(final Packet packet) {
return new GraphTextureFrame(
nativeGetGpuBuffer(packet.getNativeHandle(), /* waitOnCpu= */ false),
packet.getTimestamp());
} }
private static native long nativeGetPacketFromReference(long nativePacketHandle); private static native long nativeGetPacketFromReference(long nativePacketHandle);
@ -356,7 +367,7 @@ public final class PacketGetter {
private static native int nativeGetGpuBufferName(long nativePacketHandle); private static native int nativeGetGpuBufferName(long nativePacketHandle);
private static native long nativeGetGpuBuffer(long nativePacketHandle); private static native long nativeGetGpuBuffer(long nativePacketHandle, boolean waitOnCpu);
private PacketGetter() {} private PacketGetter() {}
} }

View File

@ -123,6 +123,7 @@ cc_library(
"//mediapipe/gpu:gpu_buffer", "//mediapipe/gpu:gpu_buffer",
"//mediapipe/gpu:gpu_shared_data_internal", "//mediapipe/gpu:gpu_shared_data_internal",
"//mediapipe/gpu:graph_support", "//mediapipe/gpu:graph_support",
"//mediapipe/gpu:egl_surface_holder",
], ],
"//mediapipe/gpu:disable_gpu": [ "//mediapipe/gpu:disable_gpu": [
"//mediapipe/gpu:gpu_shared_data_internal", "//mediapipe/gpu:gpu_shared_data_internal",

View File

@ -34,6 +34,13 @@ JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetTextureName)(
return (*buffer)->name(); return (*buffer)->name();
} }
JNIEXPORT void JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGpuWait)(
JNIEnv* env, jobject thiz, jlong nativeHandle) {
GlTextureBufferSharedPtr* buffer =
reinterpret_cast<GlTextureBufferSharedPtr*>(nativeHandle);
(*buffer)->WaitOnGpu();
}
JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetWidth)( JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetWidth)(
JNIEnv* env, jobject thiz, jlong nativeHandle) { JNIEnv* env, jobject thiz, jlong nativeHandle) {
GlTextureBufferSharedPtr* buffer = GlTextureBufferSharedPtr* buffer =

View File

@ -31,6 +31,9 @@ JNIEXPORT void JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeReleaseBuffer)(
JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetTextureName)( JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetTextureName)(
JNIEnv* env, jobject thiz, jlong nativeHandle); JNIEnv* env, jobject thiz, jlong nativeHandle);
JNIEXPORT void JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGpuWait)(
JNIEnv* env, jobject thiz, jlong nativeHandle);
JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetWidth)( JNIEXPORT jint JNICALL GRAPH_TEXTURE_FRAME_METHOD(nativeGetWidth)(
JNIEnv* env, jobject thiz, jlong nativeHandle); JNIEnv* env, jobject thiz, jlong nativeHandle);

View File

@ -437,9 +437,8 @@ JNIEXPORT jint JNICALL PACKET_GETTER_METHOD(nativeGetGpuBufferName)(
return static_cast<jint>(gpu_buffer.GetGlTextureBufferSharedPtr()->name()); return static_cast<jint>(gpu_buffer.GetGlTextureBufferSharedPtr()->name());
} }
JNIEXPORT jlong JNICALL PACKET_GETTER_METHOD(nativeGetGpuBuffer)(JNIEnv* env, JNIEXPORT jlong JNICALL PACKET_GETTER_METHOD(nativeGetGpuBuffer)(
jobject thiz, JNIEnv* env, jobject thiz, jlong packet, jboolean wait_on_cpu) {
jlong packet) {
mediapipe::Packet mediapipe_packet = mediapipe::Packet mediapipe_packet =
mediapipe::android::Graph::GetPacketFromHandle(packet); mediapipe::android::Graph::GetPacketFromHandle(packet);
mediapipe::GlTextureBufferSharedPtr ptr; mediapipe::GlTextureBufferSharedPtr ptr;
@ -459,7 +458,9 @@ JNIEXPORT jlong JNICALL PACKET_GETTER_METHOD(nativeGetGpuBuffer)(JNIEnv* env,
mediapipe_packet.Get<mediapipe::GpuBuffer>(); mediapipe_packet.Get<mediapipe::GpuBuffer>();
ptr = buffer.GetGlTextureBufferSharedPtr(); ptr = buffer.GetGlTextureBufferSharedPtr();
} }
ptr->WaitUntilComplete(); if (wait_on_cpu) {
ptr->WaitUntilComplete();
}
return reinterpret_cast<intptr_t>( return reinterpret_cast<intptr_t>(
new mediapipe::GlTextureBufferSharedPtr(ptr)); new mediapipe::GlTextureBufferSharedPtr(ptr));
} }

View File

@ -154,9 +154,8 @@ JNIEXPORT jint JNICALL PACKET_GETTER_METHOD(nativeGetGpuBufferName)(
// Returns a mediapipe::GlTextureBufferSharedPtr*. // Returns a mediapipe::GlTextureBufferSharedPtr*.
// This will survive independently of the packet. // This will survive independently of the packet.
JNIEXPORT jlong JNICALL PACKET_GETTER_METHOD(nativeGetGpuBuffer)(JNIEnv* env, JNIEXPORT jlong JNICALL PACKET_GETTER_METHOD(nativeGetGpuBuffer)(
jobject thiz, JNIEnv* env, jobject thiz, jlong packet, jboolean wait_on_cpu);
jlong packet);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"

View File

@ -22,7 +22,6 @@ android_library(
["*.java"], ["*.java"],
exclude = [ exclude = [
"CameraInput.java", "CameraInput.java",
"ResultGlBoundary.java",
"ResultGlRenderer.java", "ResultGlRenderer.java",
"SolutionGlSurfaceView.java", "SolutionGlSurfaceView.java",
"SolutionGlSurfaceViewRenderer.java", "SolutionGlSurfaceViewRenderer.java",
@ -67,7 +66,6 @@ android_library(
android_library( android_library(
name = "solution_rendering", name = "solution_rendering",
srcs = [ srcs = [
"ResultGlBoundary.java",
"ResultGlRenderer.java", "ResultGlRenderer.java",
"SolutionGlSurfaceView.java", "SolutionGlSurfaceView.java",
"SolutionGlSurfaceViewRenderer.java", "SolutionGlSurfaceViewRenderer.java",
@ -78,7 +76,6 @@ android_library(
"//mediapipe/java/com/google/mediapipe/components:android_components", "//mediapipe/java/com/google/mediapipe/components:android_components",
"//mediapipe/java/com/google/mediapipe/framework:android_framework", "//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/glutil", "//mediapipe/java/com/google/mediapipe/glutil",
"//third_party:autovalue",
"@maven//:com_google_guava_guava", "@maven//:com_google_guava_guava",
], ],
) )
@ -91,6 +88,8 @@ cc_binary(
# TODO: Add more calculators to support other top-level solutions. # TODO: Add more calculators to support other top-level solutions.
deps = [ deps = [
"//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
"//mediapipe/modules/face_detection:face_detection_full_range_image",
"//mediapipe/modules/face_detection:face_detection_short_range_image",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu_image", "//mediapipe/modules/face_landmark:face_landmark_front_cpu_image",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu_image", "//mediapipe/modules/face_landmark:face_landmark_front_gpu_image",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image", "//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image",

View File

@ -54,7 +54,7 @@ public class ImageSolutionBase extends SolutionBase {
eglManager = new EglManager(/*parentContext=*/ null); eglManager = new EglManager(/*parentContext=*/ null);
solutionGraph.setParentGlContext(eglManager.getNativeContext()); solutionGraph.setParentGlContext(eglManager.getNativeContext());
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when creating MediaPipe image solution graph. ", e); reportError("Error occurs while creating MediaPipe image solution graph.", e);
} }
} }
@ -72,8 +72,8 @@ public class ImageSolutionBase extends SolutionBase {
/** Sends a {@link TextureFrame} into solution graph for processing. */ /** Sends a {@link TextureFrame} into solution graph for processing. */
public void send(TextureFrame textureFrame) { public void send(TextureFrame textureFrame) {
if (!staticImageMode && textureFrame.getTimestamp() == Long.MIN_VALUE) { if (!staticImageMode && textureFrame.getTimestamp() == Long.MIN_VALUE) {
throwException( reportError(
"Error occurs when calling the solution send method. ", "Error occurs while calling the MediaPipe solution send method.",
new MediaPipeException( new MediaPipeException(
MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(), MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
"TextureFrame's timestamp needs to be explicitly set if not in static image mode.")); "TextureFrame's timestamp needs to be explicitly set if not in static image mode."));
@ -98,8 +98,8 @@ public class ImageSolutionBase extends SolutionBase {
/** Sends a {@link Bitmap} (static image) into solution graph for processing. */ /** Sends a {@link Bitmap} (static image) into solution graph for processing. */
public void send(Bitmap inputBitmap) { public void send(Bitmap inputBitmap) {
if (!staticImageMode) { if (!staticImageMode) {
throwException( reportError(
"Error occurs when calling the solution send method. ", "Error occurs while calling the solution send method.",
new MediaPipeException( new MediaPipeException(
MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(), MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
"When not in static image mode, a timestamp associated with the image is required." "When not in static image mode, a timestamp associated with the image is required."
@ -112,7 +112,7 @@ public class ImageSolutionBase extends SolutionBase {
/** Internal implementation of sending Bitmap/TextureFrame into the MediaPipe solution. */ /** Internal implementation of sending Bitmap/TextureFrame into the MediaPipe solution. */
private synchronized <T> void sendImage(T imageObj, long timestamp) { private synchronized <T> void sendImage(T imageObj, long timestamp) {
if (lastTimestamp >= timestamp) { if (lastTimestamp >= timestamp) {
throwException( reportError(
"The received frame having a smaller timestamp than the processed timestamp.", "The received frame having a smaller timestamp than the processed timestamp.",
new MediaPipeException( new MediaPipeException(
MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(), MediaPipeException.StatusCode.FAILED_PRECONDITION.ordinal(),
@ -123,7 +123,7 @@ public class ImageSolutionBase extends SolutionBase {
if (imageObj instanceof TextureFrame) { if (imageObj instanceof TextureFrame) {
((TextureFrame) imageObj).release(); ((TextureFrame) imageObj).release();
} }
throwException( reportError(
"The solution graph hasn't been successfully started or error occurs during graph" "The solution graph hasn't been successfully started or error occurs during graph"
+ " initializaton.", + " initializaton.",
new MediaPipeException( new MediaPipeException(
@ -140,8 +140,8 @@ public class ImageSolutionBase extends SolutionBase {
} else if (imageObj instanceof Bitmap) { } else if (imageObj instanceof Bitmap) {
imagePacket = packetCreator.createRgbaImage((Bitmap) imageObj); imagePacket = packetCreator.createRgbaImage((Bitmap) imageObj);
} else { } else {
throwException( reportError(
"The input image type is not supported. ", "The input image type is not supported.",
new MediaPipeException( new MediaPipeException(
MediaPipeException.StatusCode.UNIMPLEMENTED.ordinal(), MediaPipeException.StatusCode.UNIMPLEMENTED.ordinal(),
"The input image type is not supported.")); "The input image type is not supported."));
@ -164,7 +164,7 @@ public class ImageSolutionBase extends SolutionBase {
} }
} catch (RuntimeException e) { } catch (RuntimeException e) {
if (errorListener != null) { if (errorListener != null) {
errorListener.onError("Mediapipe error: ", e); errorListener.onError("MediaPipe packet creation error: " + e.getMessage(), e);
} else { } else {
throw e; throw e;
} }

View File

@ -33,6 +33,8 @@ public class OutputHandler<T extends SolutionResult> {
private ResultListener<T> customResultListener; private ResultListener<T> customResultListener;
// The user-defined error listener. // The user-defined error listener.
private ErrorListener customErrorListener; private ErrorListener customErrorListener;
// Whether the output handler should react to timestamp-bound changes by outputting empty packets.
private boolean handleTimestampBoundChanges = false;
/** /**
* Sets a callback to be invoked to convert a packet list to a solution result object. * Sets a callback to be invoked to convert a packet list to a solution result object.
@ -61,6 +63,20 @@ public class OutputHandler<T extends SolutionResult> {
this.customErrorListener = listener; this.customErrorListener = listener;
} }
/**
* Sets whether the output handler should react to timestamp-bound changes by outputting empty
* packets.
*
* @param handleTimestampBoundChanges a boolean value.
*/
public void setHandleTimestampBoundChanges(boolean handleTimestampBoundChanges) {
this.handleTimestampBoundChanges = handleTimestampBoundChanges;
}
public boolean handleTimestampBoundChanges() {
return handleTimestampBoundChanges;
}
/** Handles a list of output packets. Invoked when packet lists become available. */ /** Handles a list of output packets. Invoked when packet lists become available. */
public void run(List<Packet> packets) { public void run(List<Packet> packets) {
T solutionResult = null; T solutionResult = null;

View File

@ -1,37 +0,0 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutioncore;
import com.google.auto.value.AutoValue;
/**
* The left, right, bottom, and top boundaries of the visible section on the screen. The boundary
* values are typically within the range -1.0 and 1.0.
*/
@AutoValue
public abstract class ResultGlBoundary {
static ResultGlBoundary create(float left, float right, float bottom, float top) {
return new AutoValue_ResultGlBoundary(left, right, bottom, top);
}
public abstract float left();
public abstract float right();
public abstract float bottom();
public abstract float top();
}

View File

@ -20,6 +20,16 @@ public interface ResultGlRenderer<T extends ImageSolutionResult> {
/** Sets up OpenGL rendering when the surface is created or recreated. */ /** Sets up OpenGL rendering when the surface is created or recreated. */
void setupRendering(); void setupRendering();
/** Renders the solution result. */ /**
void renderResult(T result, ResultGlBoundary boundary); * Renders the solution result.
*
* @param result a solution result object that contains the solution outputs.
* @param projectionMatrix a 4 x 4 column-vector matrix stored in column-major order (see also <a
* href="https://developer.android.com/reference/android/opengl/Matrix">android.opengl.Matrix</a>).
* It is an orthographic projection matrix that maps x and y coordinates in {@code result},
* defined in [0, 1]x[0, 1] spanning the entire input image (with a top-left origin), to fit
* into the {@link SolutionGlSurfaceView} (with a bottom-left origin) that the input image is
* rendered into with potential cropping.
*/
void renderResult(T result, float[] projectionMatrix);
} }

View File

@ -73,21 +73,24 @@ public class SolutionBase {
AndroidAssetUtil.getAssetBytes(context.getAssets(), solutionInfo.binaryGraphPath())); AndroidAssetUtil.getAssetBytes(context.getAssets(), solutionInfo.binaryGraphPath()));
} }
solutionGraph.addMultiStreamCallback( solutionGraph.addMultiStreamCallback(
solutionInfo.outputStreamNames(), outputHandler::run, /*observeTimestampBounds=*/ true); solutionInfo.outputStreamNames(),
outputHandler::run,
/*observeTimestampBounds=*/ outputHandler.handleTimestampBoundChanges());
packetCreator = new AndroidPacketCreator(solutionGraph); packetCreator = new AndroidPacketCreator(solutionGraph);
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when creating the MediaPipe solution graph. ", e); reportError("Error occurs while creating the MediaPipe solution graph.", e);
} }
} }
/** Throws exception with error message. */ /** Reports error with the detailed error message. */
protected void throwException(String message, MediaPipeException e) { protected void reportError(String message, MediaPipeException e) {
String detailedErrorMessage = String.format("%s Error details: %s", message, e.getMessage());
if (errorListener != null) { if (errorListener != null) {
errorListener.onError(message, e); errorListener.onError(detailedErrorMessage, e);
} else { } else {
Log.e(TAG, message, e); Log.e(TAG, detailedErrorMessage, e);
throw e;
} }
throw e;
} }
/** /**
@ -114,7 +117,7 @@ public class SolutionBase {
solutionGraph.startRunningGraph(); solutionGraph.startRunningGraph();
} }
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when starting the MediaPipe solution graph. ", e); reportError("Error occurs while starting the MediaPipe solution graph.", e);
} }
} }
@ -123,7 +126,7 @@ public class SolutionBase {
try { try {
solutionGraph.waitUntilGraphIdle(); solutionGraph.waitUntilGraphIdle();
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when waiting until the MediaPipe graph becomes idle. ", e); reportError("Error occurs while waiting until the MediaPipe graph becomes idle.", e);
} }
} }
@ -137,12 +140,12 @@ public class SolutionBase {
// Note: errors during Process are reported at the earliest opportunity, // Note: errors during Process are reported at the earliest opportunity,
// which may be addPacket or waitUntilDone, depending on timing. For consistency, // which may be addPacket or waitUntilDone, depending on timing. For consistency,
// we want to always report them using the same async handler if installed. // we want to always report them using the same async handler if installed.
throwException("Error occurs when closing the Mediapipe solution graph. ", e); reportError("Error occurs while closing the Mediapipe solution graph.", e);
} }
try { try {
solutionGraph.tearDown(); solutionGraph.tearDown();
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when closing the Mediapipe solution graph. ", e); reportError("Error occurs while closing the Mediapipe solution graph.", e);
} }
} }
} }

View File

@ -16,6 +16,7 @@ package com.google.mediapipe.solutioncore;
import android.graphics.SurfaceTexture; import android.graphics.SurfaceTexture;
import android.opengl.GLES20; import android.opengl.GLES20;
import android.opengl.Matrix;
import com.google.mediapipe.components.GlSurfaceViewRenderer; import com.google.mediapipe.components.GlSurfaceViewRenderer;
import com.google.mediapipe.framework.TextureFrame; import com.google.mediapipe.framework.TextureFrame;
import com.google.mediapipe.glutil.ShaderUtil; import com.google.mediapipe.glutil.ShaderUtil;
@ -91,14 +92,18 @@ public class SolutionGlSurfaceViewRenderer<T extends ImageSolutionResult>
if (nextSolutionResult != null) { if (nextSolutionResult != null) {
solutionResult = nextSolutionResult.getAndSet(null); solutionResult = nextSolutionResult.getAndSet(null);
float[] textureBoundary = calculateTextureBoundary(); float[] textureBoundary = calculateTextureBoundary();
// Scales the values from [0, 1] to [-1, 1]. float[] projectionMatrix = new float[16];
ResultGlBoundary resultGlBoundary = // See {@link ResultGlRenderer#renderResult}.
ResultGlBoundary.create( Matrix.orthoM(
textureBoundary[0] * 2 - 1, projectionMatrix, /* result */
textureBoundary[1] * 2 - 1, 0, /* offset */
textureBoundary[2] * 2 - 1, textureBoundary[0], /* left */
textureBoundary[3] * 2 - 1); textureBoundary[1], /* right */
resultGlRenderer.renderResult(solutionResult, resultGlBoundary); textureBoundary[3], /* bottom */
textureBoundary[2], /* top */
-1, /* near */
1 /* far */);
resultGlRenderer.renderResult(solutionResult, projectionMatrix);
} }
flush(frame); flush(frame);
if (solutionResult != null) { if (solutionResult != null) {

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.google.mediapipe.solutions.facedetection">
<uses-sdk android:minSdkVersion="21"
android:targetSdkVersion="27" />
</manifest>

View File

@ -0,0 +1,45 @@
# Copyright 2021 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
android_library(
name = "facedetection",
srcs = [
"FaceDetection.java",
"FaceDetectionOptions.java",
"FaceDetectionResult.java",
"FaceKeypoint.java",
],
assets = [
"//mediapipe/modules/face_detection:face_detection_full_range_image.binarypb",
"//mediapipe/modules/face_detection:face_detection_full_range_sparse.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_detection:face_detection_short_range_image.binarypb",
],
assets_dir = "",
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],
manifest = ":AndroidManifest.xml",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework/formats:detection_java_proto_lite",
"//mediapipe/framework/formats:location_data_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_base",
"//third_party:autovalue",
"@maven//:androidx_annotation_annotation",
"@maven//:com_google_code_findbugs_jsr305",
"@maven//:com_google_guava_guava",
],
)

View File

@ -0,0 +1,130 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.facedetection;
import android.content.Context;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutioncore.ErrorListener;
import com.google.mediapipe.solutioncore.ImageSolutionBase;
import com.google.mediapipe.solutioncore.OutputHandler;
import com.google.mediapipe.solutioncore.ResultListener;
import com.google.mediapipe.solutioncore.SolutionInfo;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
/**
* MediaPipe Face Detection Solution API.
*
* <p>MediaPipe Face Detection processes a {@link TextureFrame} or a {@link Bitmap} and returns the
* {@link FaceDetectionResult} representing each detected face. Please refer to
* https://solutions.mediapipe.dev/face_detection#android-solution-api for usage examples.
*/
public class FaceDetection extends ImageSolutionBase {
private static final String TAG = "FaceDetection";
private static final String SHORT_RANGE_GRAPH_NAME = "face_detection_short_range_image.binarypb";
private static final String FULL_RANGE_GRAPH_NAME = "face_detection_full_range_image.binarypb";
private static final String IMAGE_INPUT_STREAM = "image";
private static final ImmutableList<String> OUTPUT_STREAMS =
ImmutableList.of("detections", "throttled_image");
private static final int DETECTIONS_INDEX = 0;
private static final int INPUT_IMAGE_INDEX = 1;
private final OutputHandler<FaceDetectionResult> outputHandler;
/**
* Initializes MediaPipe Face Detection solution.
*
* @param context an Android {@link Context}.
* @param options the configuration options defined in {@link FaceDetectionOptions}.
*/
public FaceDetection(Context context, FaceDetectionOptions options) {
outputHandler = new OutputHandler<>();
outputHandler.setOutputConverter(
packets -> {
FaceDetectionResult.Builder faceMeshResultBuilder = FaceDetectionResult.builder();
try {
faceMeshResultBuilder.setMultiFaceDetections(
getProtoVector(packets.get(DETECTIONS_INDEX), Detection.parser()));
} catch (MediaPipeException e) {
reportError("Error occurs while getting MediaPipe face detection results.", e);
}
return faceMeshResultBuilder
.setImagePacket(packets.get(INPUT_IMAGE_INDEX))
.setTimestamp(
staticImageMode ? Long.MIN_VALUE : packets.get(INPUT_IMAGE_INDEX).getTimestamp())
.build();
});
SolutionInfo solutionInfo =
SolutionInfo.builder()
.setBinaryGraphPath(
options.modelSelection() == 0 ? SHORT_RANGE_GRAPH_NAME : FULL_RANGE_GRAPH_NAME)
.setImageInputStreamName(IMAGE_INPUT_STREAM)
.setOutputStreamNames(OUTPUT_STREAMS)
.setStaticImageMode(options.staticImageMode())
.build();
initialize(context, solutionInfo, outputHandler);
Map<String, Packet> emptyInputSidePackets = new HashMap<>();
start(emptyInputSidePackets);
}
/**
* Sets a callback to be invoked when a {@link FaceDetectionResult} becomes available.
*
* @param listener the {@link ResultListener} callback.
*/
public void setResultListener(ResultListener<FaceDetectionResult> listener) {
this.outputHandler.setResultListener(listener);
}
/**
* Sets a callback to be invoked when the Face Detection solution throws errors.
*
* @param listener the {@link ErrorListener} callback.
*/
public void setErrorListener(@Nullable ErrorListener listener) {
this.outputHandler.setErrorListener(listener);
this.errorListener = listener;
}
/**
* Gets a specific face keypoint by face index and face keypoint type.
*
* @param result the returned {@link FaceDetectionResult} object.
* @param faceIndex the face index. A smaller index maps to a detected face with a higher
* confidence score.
* @param faceKeypointType the face keypoint type defined in {@link FaceKeypoint}.
*/
public static RelativeKeypoint getFaceKeypoint(
FaceDetectionResult result,
int faceIndex,
@FaceKeypoint.FaceKeypointType int faceKeypointType) {
if (result == null
|| faceIndex >= result.multiFaceDetections().size()
|| faceKeypointType >= FaceKeypoint.NUM_KEY_POINTS) {
return RelativeKeypoint.getDefaultInstance();
}
Detection detection = result.multiFaceDetections().get(faceIndex);
float x = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getX();
float y = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getY();
return RelativeKeypoint.newBuilder().setX(x).setY(y).build();
}
}

View File

@ -0,0 +1,61 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.facedetection;
import com.google.auto.value.AutoValue;
/**
* MediaPipe Face Detection solution-specific options.
*
* <p>staticImageMode: Whether to treat the input images as a batch of static and possibly unrelated
* images, or a video stream. Default to false. See details in
* https://solutions.mediapipe.dev/face_detection#static_image_mode.
*
* <p>minDetectionConfidence: Minimum confidence value ([0.0, 1.0]) for face detection to be
* considered successful. See details in
* https://solutions.mediapipe.dev/face_detection#min_detection_confidence.
*
* <p>modelSelection: 0 or 1. 0 to select a short-range model that works best for faces within 2
* meters from the camera, and 1 for a full-range model best for faces within 5 meters. See details
* in https://solutions.mediapipe.dev/face_detection#model_selection.
*/
@AutoValue
public abstract class FaceDetectionOptions {
public abstract boolean staticImageMode();
public abstract int modelSelection();
public abstract float minDetectionConfidence();
public static Builder builder() {
return new AutoValue_FaceDetectionOptions.Builder().withDefaultValues();
}
/** Builder for {@link FaceDetectionOptions}. */
@AutoValue.Builder
public abstract static class Builder {
public Builder withDefaultValues() {
return setStaticImageMode(false).setModelSelection(0).setMinDetectionConfidence(0.5f);
}
public abstract Builder setStaticImageMode(boolean value);
public abstract Builder setModelSelection(int value);
public abstract Builder setMinDetectionConfidence(float value);
public abstract FaceDetectionOptions build();
}
}

View File

@ -0,0 +1,65 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.facedetection;
import android.graphics.Bitmap;
import com.google.auto.value.AutoBuilder;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.framework.TextureFrame;
import com.google.mediapipe.solutioncore.ImageSolutionResult;
import java.util.List;
/**
* FaceDetectionResult contains the detected faces, and the input {@link Bitmap} or {@link
* TextureFrame}. If not in static image mode, the timestamp field will be set to the timestamp of
* the corresponding input image.
*/
public class FaceDetectionResult extends ImageSolutionResult {
private final ImmutableList<Detection> multiFaceDetections;
FaceDetectionResult(
ImmutableList<Detection> multiFaceDetections, Packet imagePacket, long timestamp) {
this.multiFaceDetections = multiFaceDetections;
this.timestamp = timestamp;
this.imagePacket = imagePacket;
}
// Collection of detected faces, where each face is represented as a detection proto message that
// contains a bounding box and 6 {@link FaceKeypoint}s. The bounding box is composed of xmin and
// width (both normalized to [0.0, 1.0] by the image width) and ymin and height (both normalized
// to [0.0, 1.0] by the image height). Each keypoint is composed of x and y, which are normalized
// to [0.0, 1.0] by the image width and height respectively.
public ImmutableList<Detection> multiFaceDetections() {
return multiFaceDetections;
}
public static Builder builder() {
return new AutoBuilder_FaceDetectionResult_Builder();
}
/** Builder for {@link FaceDetectionResult}. */
@AutoBuilder
public abstract static class Builder {
abstract Builder setMultiFaceDetections(List<Detection> value);
abstract Builder setTimestamp(long value);
abstract Builder setImagePacket(Packet value);
abstract FaceDetectionResult build();
}
}

View File

@ -0,0 +1,42 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.mediapipe.solutions.facedetection;
import androidx.annotation.IntDef;
/** The 6 face keypoints. */
public final class FaceKeypoint {
public static final int NUM_KEY_POINTS = 6;
public static final int RIGHT_EYE = 0;
public static final int LEFT_EYE = 1;
public static final int NOSE_TIP = 2;
public static final int MOUTH_CENTER = 3;
public static final int RIGHT_EAR_TRAGION = 4;
public static final int LEFT_EAR_TRAGION = 5;
/** Represents a face keypoint type. */
@IntDef({
RIGHT_EYE,
LEFT_EYE,
NOSE_TIP,
MOUTH_CENTER,
RIGHT_EAR_TRAGION,
LEFT_EAR_TRAGION,
})
public @interface FaceKeypointType {}
private FaceKeypoint() {}
}

View File

@ -25,6 +25,7 @@ android_library(
assets = [ assets = [
"//mediapipe/modules/face_detection:face_detection_short_range.tflite", "//mediapipe/modules/face_detection:face_detection_short_range.tflite",
"//mediapipe/modules/face_landmark:face_landmark.tflite", "//mediapipe/modules/face_landmark:face_landmark.tflite",
"//mediapipe/modules/face_landmark:face_landmark_with_attention.tflite",
"//mediapipe/modules/face_landmark:face_landmark_front_cpu_image.binarypb", "//mediapipe/modules/face_landmark:face_landmark_front_cpu_image.binarypb",
"//mediapipe/modules/face_landmark:face_landmark_front_gpu_image.binarypb", "//mediapipe/modules/face_landmark:face_landmark_front_gpu_image.binarypb",
], ],

View File

@ -29,41 +29,46 @@ import java.util.Map;
import javax.annotation.Nullable; import javax.annotation.Nullable;
/** /**
* MediaPipe FaceMesh Solution API. * MediaPipe Face Mesh Solution API.
* *
* <p>MediaPipe FaceMesh processes a {@link TextureFrame} or a {@link Bitmap} and returns the face * <p>MediaPipe Face Mesh processes a {@link TextureFrame} or a {@link Bitmap} and returns the face
* landmarks of each detected face. Please refer to * landmarks of each detected face. Please refer to
* https://solutions.mediapipe.dev/face_mesh#android-solution-api for usage examples. * https://solutions.mediapipe.dev/face_mesh#android-solution-api for usage examples.
*/ */
public class FaceMesh extends ImageSolutionBase { public class FaceMesh extends ImageSolutionBase {
private static final String TAG = "FaceMesh"; private static final String TAG = "FaceMesh";
public static final int FACEMESH_NUM_LANDMARKS = 468;
public static final int FACEMESH_NUM_LANDMARKS_WITH_IRISES = 478;
private static final String NUM_FACES = "num_faces"; private static final String NUM_FACES = "num_faces";
private static final String WITH_ATTENTION = "with_attention";
private static final String USE_PREV_LANDMARKS = "use_prev_landmarks";
private static final String GPU_GRAPH_NAME = "face_landmark_front_gpu_image.binarypb"; private static final String GPU_GRAPH_NAME = "face_landmark_front_gpu_image.binarypb";
private static final String CPU_GRAPH_NAME = "face_landmark_front_cpu_image.binarypb"; private static final String CPU_GRAPH_NAME = "face_landmark_front_cpu_image.binarypb";
private static final String IMAGE_INPUT_STREAM = "image"; private static final String IMAGE_INPUT_STREAM = "image";
private static final ImmutableList<String> OUTPUT_STREAMS = private static final ImmutableList<String> OUTPUT_STREAMS =
ImmutableList.of("multi_face_landmarks", "image"); ImmutableList.of("multi_face_landmarks", "throttled_image");
private static final int LANDMARKS_INDEX = 0; private static final int LANDMARKS_INDEX = 0;
private static final int INPUT_IMAGE_INDEX = 1; private static final int INPUT_IMAGE_INDEX = 1;
private final OutputHandler<FaceMeshResult> graphOutputHandler; private final OutputHandler<FaceMeshResult> outputHandler;
/** /**
* Initializes MediaPipe FaceMesh solution. * Initializes MediaPipe Face Mesh solution.
* *
* @param context an Android {@link Context}. * @param context an Android {@link Context}.
* @param options the configuration options defined in {@link FaceMeshOptions}. * @param options the configuration options defined in {@link FaceMeshOptions}.
*/ */
public FaceMesh(Context context, FaceMeshOptions options) { public FaceMesh(Context context, FaceMeshOptions options) {
graphOutputHandler = new OutputHandler<>(); outputHandler = new OutputHandler<>();
graphOutputHandler.setOutputConverter( outputHandler.setOutputConverter(
packets -> { packets -> {
FaceMeshResult.Builder faceMeshResultBuilder = FaceMeshResult.builder(); FaceMeshResult.Builder faceMeshResultBuilder = FaceMeshResult.builder();
try { try {
faceMeshResultBuilder.setMultiFaceLandmarks( faceMeshResultBuilder.setMultiFaceLandmarks(
getProtoVector(packets.get(LANDMARKS_INDEX), NormalizedLandmarkList.parser())); getProtoVector(packets.get(LANDMARKS_INDEX), NormalizedLandmarkList.parser()));
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
throwException("Error occurs when getting MediaPipe facemesh landmarks. ", e); reportError("Error occurs when getting MediaPipe facemesh landmarks.", e);
} }
return faceMeshResultBuilder return faceMeshResultBuilder
.setImagePacket(packets.get(INPUT_IMAGE_INDEX)) .setImagePacket(packets.get(INPUT_IMAGE_INDEX))
@ -77,31 +82,33 @@ public class FaceMesh extends ImageSolutionBase {
.setBinaryGraphPath(options.runOnGpu() ? GPU_GRAPH_NAME : CPU_GRAPH_NAME) .setBinaryGraphPath(options.runOnGpu() ? GPU_GRAPH_NAME : CPU_GRAPH_NAME)
.setImageInputStreamName(IMAGE_INPUT_STREAM) .setImageInputStreamName(IMAGE_INPUT_STREAM)
.setOutputStreamNames(OUTPUT_STREAMS) .setOutputStreamNames(OUTPUT_STREAMS)
.setStaticImageMode(options.mode() == FaceMeshOptions.STATIC_IMAGE_MODE) .setStaticImageMode(options.staticImageMode())
.build(); .build();
initialize(context, solutionInfo, graphOutputHandler); initialize(context, solutionInfo, outputHandler);
Map<String, Packet> inputSidePackets = new HashMap<>(); Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(NUM_FACES, packetCreator.createInt32(options.maxNumFaces())); inputSidePackets.put(NUM_FACES, packetCreator.createInt32(options.maxNumFaces()));
inputSidePackets.put(WITH_ATTENTION, packetCreator.createBool(options.refineLandmarks()));
inputSidePackets.put(USE_PREV_LANDMARKS, packetCreator.createBool(!options.staticImageMode()));
start(inputSidePackets); start(inputSidePackets);
} }
/** /**
* Sets a callback to be invoked when the FaceMeshResults become available. * Sets a callback to be invoked when a {@link FaceMeshResult} becomes available.
* *
* @param listener the {@link ResultListener} callback. * @param listener the {@link ResultListener} callback.
*/ */
public void setResultListener(ResultListener<FaceMeshResult> listener) { public void setResultListener(ResultListener<FaceMeshResult> listener) {
this.graphOutputHandler.setResultListener(listener); this.outputHandler.setResultListener(listener);
} }
/** /**
* Sets a callback to be invoked when the FaceMesh solution throws errors. * Sets a callback to be invoked when the Face Mesh solution throws errors.
* *
* @param listener the {@link ErrorListener} callback. * @param listener the {@link ErrorListener} callback.
*/ */
public void setErrorListener(@Nullable ErrorListener listener) { public void setErrorListener(@Nullable ErrorListener listener) {
this.graphOutputHandler.setErrorListener(listener); this.outputHandler.setErrorListener(listener);
this.errorListener = listener; this.errorListener = listener;
} }
} }

Some files were not shown because too many files have changed in this diff Show More