Project import generated by Copybara.

GitOrigin-RevId: d4a11282d20fe4d2e137f9032cf349750030dcb9
This commit is contained in:
MediaPipe Team 2021-11-03 14:21:54 -07:00 committed by jqtang
parent 1faeaae7e5
commit d4bb35fe5a
72 changed files with 1089 additions and 336 deletions

View File

@ -257,8 +257,15 @@ glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
@ -297,10 +304,17 @@ FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
int width = faceDetectionResult.inputBitmap().getWidth();
int height = faceDetectionResult.inputBitmap().getHeight();
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
@ -334,9 +348,9 @@ ActivityResultLauncher<Intent> imageGetter =
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
@ -368,8 +382,15 @@ glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener(
faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i(
TAG,
String.format(
@ -398,9 +419,9 @@ ActivityResultLauncher<Intent> videoGetter =
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps

View File

@ -612,9 +612,9 @@ ActivityResultLauncher<Intent> imageGetter =
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
@ -678,9 +678,9 @@ ActivityResultLauncher<Intent> videoGetter =
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps

View File

@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
mobile real-time uses in a manner similar to the face detection model in
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
task: our
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite)
has to work across a variety of hand sizes with a large scale span (~20x)
[lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
and
[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
have to work across a variety of hand sizes with a large scale span (~20x)
relative to the image frame and be able to detect occluded and self-occluded
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
region, the lack of such features in hands makes it comparatively difficult to
@ -195,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
and the smaller the value the closer the landmark is to the camera. The
magnitude of `z` uses roughly the same scale as `x`.
#### multi_hand_world_landmarks
Collection of detected/tracked hands, where each hand is represented as a list
of 21 hand landmarks in world coordinates. Each landmark consists of the
following:
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
hand's approximate geometric center.
* `visibility`: Identical to that defined in the corresponding
[multi_hand_landmarks](#multi_hand_landmarks).
#### multi_handedness
Collection of handedness of the detected/tracked hands (i.e. is it a left or
@ -262,6 +275,12 @@ with mp_hands.Hands(
mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
# Draw hand world landmarks.
if not results.multi_hand_world_landmarks:
continue
for hand_world_landmarks in results.multi_hand_world_landmarks:
mp_drawing.plot_landmarks(
hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)
# For webcam input:
cap = cv2.VideoCapture(0)
@ -400,7 +419,7 @@ Supported configuration options:
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(false)
.setMaxNumHands(1)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
@ -423,8 +442,11 @@ glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
@ -453,7 +475,7 @@ glSurfaceView.post(
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(true)
.setMaxNumHands(1)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
@ -464,10 +486,13 @@ Hands hands = new Hands(this, handsOptions);
HandsResultImageView imageView = new HandsResultImageView(this);
hands.setResultListener(
handsResult -> {
if (result.multiHandLandmarks().isEmpty()) {
return;
}
int width = handsResult.inputBitmap().getWidth();
int height = handsResult.inputBitmap().getHeight();
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
@ -501,9 +526,9 @@ ActivityResultLauncher<Intent> imageGetter =
}
}
});
Intent gallery = new Intent(
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
```
#### Video Input
@ -513,7 +538,7 @@ imageGetter.launch(gallery);
HandsOptions handsOptions =
HandsOptions.builder()
.setStaticImageMode(false)
.setMaxNumHands(1)
.setMaxNumHands(2)
.setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions);
hands.setErrorListener(
@ -536,8 +561,11 @@ glSurfaceView.setRenderInputImage(true);
hands.setResultListener(
handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
handsResult, 0, HandLandmark.WRIST);
if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
@ -566,9 +594,9 @@ ActivityResultLauncher<Intent> videoGetter =
}
}
});
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
```
## Example Apps

View File

@ -159,6 +159,11 @@ images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`.
#### refine_face_landmarks
Whether to further refine the landmark coordinates around the eyes and lips, and
output additional landmarks around the irises. Default to `false`.
#### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -241,6 +246,7 @@ Supported configuration options:
* [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation)
* [refine_face_landmarks](#refine_face_landmarks)
* [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence)
@ -256,7 +262,8 @@ IMAGE_FILES = []
with mp_holistic.Holistic(
static_image_mode=True,
model_complexity=2,
enable_segmentation=True) as holistic:
enable_segmentation=True,
refine_face_landmarks=True) as holistic:
for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file)
image_height, image_width, _ = image.shape
@ -350,6 +357,7 @@ Supported configuration options:
* [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation)
* [refineFaceLandmarks](#refineFaceLandmarks)
* [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence)
@ -421,6 +429,7 @@ holistic.setOptions({
smoothLandmarks: true,
enableSegmentation: true,
smoothSegmentation: true,
refineFaceLandmarks: true,
minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5
});

View File

@ -55,15 +55,14 @@ one over the other.
### [Hands](https://google.github.io/mediapipe/solutions/hands)
* Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model:
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc),
[Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
* [Model card](https://mediapipe.page.link/handmc)
### [Pose](https://google.github.io/mediapipe/solutions/pose)

View File

@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
const auto calculator_options =
cc->Options<mediapipe::PacketClonerCalculatorOptions>();
output_only_when_all_inputs_received_ =
calculator_options.output_only_when_all_inputs_received();
calculator_options.output_only_when_all_inputs_received() ||
calculator_options.output_packets_only_when_all_inputs_received();
output_empty_packets_before_all_inputs_received_ =
calculator_options.output_packets_only_when_all_inputs_received();
// Parse input streams.
tick_signal_index_ = cc->Inputs().NumEntries() - 1;
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
// Return if one of the input is null.
for (int i = 0; i < tick_signal_index_; ++i) {
if (current_[i].IsEmpty()) {
if (output_empty_packets_before_all_inputs_received_) {
SetAllNextTimestampBounds(cc);
}
return absl::OkStatus();
}
}
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
}
private:
void SetAllNextTimestampBounds(CalculatorContext* cc) {
for (int j = 0; j < tick_signal_index_; ++j) {
cc->Outputs().Index(j).SetNextTimestampBound(
cc->InputTimestamp().NextAllowedInStream());
}
}
std::vector<Packet> current_;
int tick_signal_index_;
bool output_only_when_all_inputs_received_;
bool output_empty_packets_before_all_inputs_received_;
};
REGISTER_CALCULATOR(PacketClonerCalculator);

View File

@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
// When true, this calculator will drop received TICK packets if any input
// stream hasn't received a packet yet.
optional bool output_only_when_all_inputs_received = 1 [default = false];
// Similar with above, but also transmit empty packet for all streams before
// all inputs are received.
optional bool output_packets_only_when_all_inputs_received = 2
[default = false];
}

View File

@ -32,9 +32,9 @@ public class FaceDetectionResultImageView extends AppCompatImageView {
private static final String TAG = "FaceDetectionResultImageView";
private static final int KEYPOINT_COLOR = Color.RED;
private static final int KEYPOINT_RADIUS = 15;
private static final int KEYPOINT_RADIUS = 8; // Pixels
private static final int BBOX_COLOR = Color.GREEN;
private static final int BBOX_THICKNESS = 10;
private static final int BBOX_THICKNESS = 5; // Pixels
private Bitmap latest;
public FaceDetectionResultImageView(Context context) {

View File

@ -28,7 +28,6 @@ import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
import com.google.mediapipe.solutioncore.VideoInput;
@ -36,6 +35,7 @@ import com.google.mediapipe.solutions.facedetection.FaceDetection;
import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions;
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import java.io.IOException;
import java.io.InputStream;
@ -175,9 +175,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
});
imageView = new FaceDetectionResultImageView(this);
}
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
});
}
@ -334,8 +334,15 @@ public class MainActivity extends AppCompatActivity {
private void logNoseTipKeypoint(
FaceDetectionResult result, int faceIndex, boolean showPixelValues) {
if (result.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, faceIndex, FaceKeypoint.NOSE_TIP);
result
.multiFaceDetections()
.get(faceIndex)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) {
int width = result.inputBitmap().getWidth();

View File

@ -34,19 +34,19 @@ public class FaceMeshResultImageView extends AppCompatImageView {
private static final String TAG = "FaceMeshResultImageView";
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
private static final int TESSELATION_THICKNESS = 5;
private static final int TESSELATION_THICKNESS = 3; // Pixels
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYE_THICKNESS = 8;
private static final int RIGHT_EYE_THICKNESS = 5; // Pixels
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYEBROW_THICKNESS = 8;
private static final int RIGHT_EYEBROW_THICKNESS = 5; // Pixels
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYE_THICKNESS = 8;
private static final int LEFT_EYE_THICKNESS = 5; // Pixels
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYEBROW_THICKNESS = 8;
private static final int LEFT_EYEBROW_THICKNESS = 5; // Pixels
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
private static final int FACE_OVAL_THICKNESS = 8;
private static final int FACE_OVAL_THICKNESS = 5; // Pixels
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
private static final int LIPS_THICKNESS = 8;
private static final int LIPS_THICKNESS = 5; // Pixels
private Bitmap latest;
public FaceMeshResultImageView(Context context) {

View File

@ -176,9 +176,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
});
imageView = new FaceMeshResultImageView(this);
}
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
});
}

View File

@ -28,7 +28,16 @@ import java.util.List;
public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
private static final String TAG = "HandsResultGlRenderer";
private static final float CONNECTION_THICKNESS = 20.0f;
private static final float[] LEFT_HAND_CONNECTION_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float[] RIGHT_HAND_CONNECTION_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float CONNECTION_THICKNESS = 25.0f;
private static final float[] LEFT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float[] RIGHT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float HOLLOW_CIRCLE_RADIUS = 0.01f;
private static final float[] LEFT_HAND_LANDMARK_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float[] RIGHT_HAND_LANDMARK_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float LANDMARK_RADIUS = 0.008f;
private static final int NUM_SEGMENTS = 120;
private static final String VERTEX_SHADER =
"uniform mat4 uProjectionMatrix;\n"
+ "attribute vec4 vPosition;\n"
@ -37,12 +46,14 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
+ "}";
private static final String FRAGMENT_SHADER =
"precision mediump float;\n"
+ "uniform vec4 uColor;\n"
+ "void main() {\n"
+ " gl_FragColor = vec4(0, 1, 0, 1);\n"
+ " gl_FragColor = uColor;\n"
+ "}";
private int program;
private int positionHandle;
private int projectionMatrixHandle;
private int colorHandle;
private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type);
@ -61,6 +72,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
}
@Override
@ -74,7 +86,22 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
int numHands = result.multiHandLandmarks().size();
for (int i = 0; i < numHands; ++i) {
drawLandmarks(result.multiHandLandmarks().get(i).getLandmarkList());
boolean isLeftHand = result.multiHandedness().get(i).getLabel().equals("Left");
drawConnections(
result.multiHandLandmarks().get(i).getLandmarkList(),
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
for (NormalizedLandmark landmark : result.multiHandLandmarks().get(i).getLandmarkList()) {
// Draws the landmark.
drawCircle(
landmark.getX(),
landmark.getY(),
isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
// Draws a hollow circle around the landmark.
drawHollowCircle(
landmark.getX(),
landmark.getY(),
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
}
}
}
@ -87,7 +114,8 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDeleteProgram(program);
}
private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) {
private void drawConnections(List<NormalizedLandmark> handLandmarkList, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
NormalizedLandmark start = handLandmarkList.get(c.start());
NormalizedLandmark end = handLandmarkList.get(c.end());
@ -103,4 +131,51 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
}
}
private void drawCircle(float x, float y, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
int vertexCount = NUM_SEGMENTS + 2;
float[] vertices = new float[vertexCount * 3];
vertices[0] = x;
vertices[1] = y;
vertices[2] = 0;
for (int i = 1; i < vertexCount; i++) {
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
int currentIndex = 3 * i;
vertices[currentIndex] = x + (float) (LANDMARK_RADIUS * Math.cos(angle));
vertices[currentIndex + 1] = y + (float) (LANDMARK_RADIUS * Math.sin(angle));
vertices[currentIndex + 2] = 0;
}
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertices.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertices);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_FAN, 0, vertexCount);
}
private void drawHollowCircle(float x, float y, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
int vertexCount = NUM_SEGMENTS + 1;
float[] vertices = new float[vertexCount * 3];
for (int i = 0; i < vertexCount; i++) {
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
int currentIndex = 3 * i;
vertices[currentIndex] = x + (float) (HOLLOW_CIRCLE_RADIUS * Math.cos(angle));
vertices[currentIndex + 1] = y + (float) (HOLLOW_CIRCLE_RADIUS * Math.sin(angle));
vertices[currentIndex + 2] = 0;
}
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertices.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertices);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_LINE_STRIP, 0, vertexCount);
}
}

View File

@ -31,10 +31,15 @@ import java.util.List;
public class HandsResultImageView extends AppCompatImageView {
private static final String TAG = "HandsResultImageView";
private static final int LANDMARK_COLOR = Color.RED;
private static final int LANDMARK_RADIUS = 15;
private static final int CONNECTION_COLOR = Color.GREEN;
private static final int CONNECTION_THICKNESS = 10;
private static final int LEFT_HAND_CONNECTION_COLOR = Color.parseColor("#30FF30");
private static final int RIGHT_HAND_CONNECTION_COLOR = Color.parseColor("#FF3030");
private static final int CONNECTION_THICKNESS = 8; // Pixels
private static final int LEFT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#30FF30");
private static final int RIGHT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#FF3030");
private static final int HOLLOW_CIRCLE_WIDTH = 5; // Pixels
private static final int LEFT_HAND_LANDMARK_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_HAND_LANDMARK_COLOR = Color.parseColor("#30FF30");
private static final int LANDMARK_RADIUS = 10; // Pixels
private Bitmap latest;
public HandsResultImageView(Context context) {
@ -62,7 +67,11 @@ public class HandsResultImageView extends AppCompatImageView {
int numHands = result.multiHandLandmarks().size();
for (int i = 0; i < numHands; ++i) {
drawLandmarksOnCanvas(
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
result.multiHandLandmarks().get(i).getLandmarkList(),
result.multiHandedness().get(i).getLabel().equals("Left"),
canvas,
width,
height);
}
}
@ -75,11 +84,16 @@ public class HandsResultImageView extends AppCompatImageView {
}
private void drawLandmarksOnCanvas(
List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) {
List<NormalizedLandmark> handLandmarkList,
boolean isLeftHand,
Canvas canvas,
int width,
int height) {
// Draw connections.
for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
Paint connectionPaint = new Paint();
connectionPaint.setColor(CONNECTION_COLOR);
connectionPaint.setColor(
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
connectionPaint.setStrokeWidth(CONNECTION_THICKNESS);
NormalizedLandmark start = handLandmarkList.get(c.start());
NormalizedLandmark end = handLandmarkList.get(c.end());
@ -91,11 +105,23 @@ public class HandsResultImageView extends AppCompatImageView {
connectionPaint);
}
Paint landmarkPaint = new Paint();
landmarkPaint.setColor(LANDMARK_COLOR);
// Draw landmarks.
landmarkPaint.setColor(isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
// Draws landmarks.
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
canvas.drawCircle(
landmark.getX() * width, landmark.getY() * height, LANDMARK_RADIUS, landmarkPaint);
}
// Draws hollow circles around landmarks.
landmarkPaint.setColor(
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
landmarkPaint.setStrokeWidth(HOLLOW_CIRCLE_WIDTH);
landmarkPaint.setStyle(Paint.Style.STROKE);
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
canvas.drawCircle(
landmark.getX() * width,
landmark.getY() * height,
LANDMARK_RADIUS + HOLLOW_CIRCLE_WIDTH,
landmarkPaint);
}
}
}

View File

@ -28,6 +28,7 @@ import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency
import com.google.mediapipe.formats.proto.LandmarkProto.Landmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
@ -177,9 +178,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline();
}
// Reads images from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
imageGetter.launch(gallery);
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(pickImageIntent);
});
imageView = new HandsResultImageView(this);
}
@ -193,7 +194,7 @@ public class MainActivity extends AppCompatActivity {
this,
HandsOptions.builder()
.setStaticImageMode(true)
.setMaxNumHands(1)
.setMaxNumHands(2)
.setRunOnGpu(RUN_ON_GPU)
.build());
@ -241,9 +242,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery.
Intent gallery =
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
videoGetter.launch(gallery);
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(pickVideoIntent);
});
}
@ -269,7 +270,7 @@ public class MainActivity extends AppCompatActivity {
this,
HandsOptions.builder()
.setStaticImageMode(false)
.setMaxNumHands(1)
.setMaxNumHands(2)
.setRunOnGpu(RUN_ON_GPU)
.build());
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
@ -336,7 +337,11 @@ public class MainActivity extends AppCompatActivity {
}
private void logWristLandmark(HandsResult result, boolean showPixelValues) {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
result.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) {
int width = result.inputBitmap().getWidth();
@ -353,5 +358,16 @@ public class MainActivity extends AppCompatActivity {
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY()));
}
if (result.multiHandWorldLandmarks().isEmpty()) {
return;
}
Landmark wristWorldLandmark =
result.multiHandWorldLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist world coordinates (in meters with the origin at the hand's"
+ " approximate geometric center): x=%f m, y=%f m, z=%f m",
wristWorldLandmark.getX(), wristWorldLandmark.getY(), wristWorldLandmark.getZ()));
}
}

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -37,9 +37,11 @@ android_binary(
srcs = glob(["*.java"]),
assets = [
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
],
assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -53,6 +55,7 @@ android_binary(
"outputVideoStreamName": "output_video",
"flipFramesVertically": "True",
"converterNumBuffers": "2",
# "modelComplexity": "0" # 0=lite, 1=heavy, not specified=heavy
},
multidex = "native",
deps = [

View File

@ -14,6 +14,9 @@
package com.google.mediapipe.apps.handtrackinggpu;
import android.content.pm.ApplicationInfo;
import android.content.pm.PackageManager;
import android.content.pm.PackageManager.NameNotFoundException;
import android.os.Bundle;
import android.util.Log;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
@ -30,6 +33,7 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private static final String TAG = "MainActivity";
private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
private static final String INPUT_MODEL_COMPLEXITY = "model_complexity";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
// Max number of hands to detect/process.
private static final int NUM_HANDS = 2;
@ -38,9 +42,22 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
ApplicationInfo applicationInfo;
try {
applicationInfo =
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
} catch (NameNotFoundException e) {
throw new AssertionError(e);
}
AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
if (applicationInfo.metaData.containsKey("modelComplexity")) {
inputSidePackets.put(
INPUT_MODEL_COMPLEXITY,
packetCreator.createInt32(applicationInfo.metaData.getInt("modelComplexity")));
}
processor.setInputSidePackets(inputSidePackets);
// To show verbose logging, run:

View File

@ -282,8 +282,12 @@ absl::Status KinematicPathSolver::UpdatePixelsPerDegree(
absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
const int max_location) {
RET_CHECK(initialized_)
<< "UpdateMinMaxLocation called before first observation added.";
if (!initialized_) {
max_location_ = max_location;
min_location_ = min_location;
return absl::OkStatus();
}
double prior_distance = max_location_ - min_location_;
double updated_distance = max_location - min_location;
double scale_change = updated_distance / prior_distance;

View File

@ -435,6 +435,23 @@ TEST(KinematicPathSolverTest, PassBorderTest) {
EXPECT_FLOAT_EQ(state, 404.56668);
}
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfUninitialized) {
KinematicOptions options;
options.set_min_motion_to_reframe(2.0);
options.set_max_velocity(1000);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
}
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfInitialized) {
KinematicOptions options;
options.set_min_motion_to_reframe(2.0);
options.set_max_velocity(1000);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
}
} // namespace
} // namespace autoflip
} // namespace mediapipe

View File

@ -55,7 +55,7 @@ objc_library(
name = "HandDetectionGpuAppLibrary",
data = [
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
],
deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -64,7 +64,7 @@ objc_library(
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
],
deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -0,0 +1,40 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
// Joint of a 3D human model (e.g. elbow, knee, wrist). Contains 3D rotation of
// the joint and its visibility.
message Joint {
// Joint rotation in 6D contineous representation.
// Such representation is more sutable for NN model training and can be
// converted to quaternions and Euler angles if needed. Details can be found
// in https://arxiv.org/abs/1812.07035.
repeated float rotation_6d = 1;
// Joint visibility.
// Float score of whether joint is visible: present on the screen and not
// occluded by other objects. Depending on the model, visibility value is
// either a sigmoid or an argument of sigmoid, but in any case higher value
// indicates higher probability of joint being visible. Should stay unset if
// not supported.
optional float visibility = 2;
}
// Group of Joint protos.
message JointList {
repeated Joint joint = 1;
}

View File

@ -109,8 +109,7 @@ class Image {
return gpu_buffer_.GetCVPixelBufferRef();
}
#else
const mediapipe::GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr()
const {
mediapipe::GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
if (use_gpu_ == false) ConvertToGpu();
return gpu_buffer_.GetGlTextureBufferSharedPtr();
}

View File

@ -22,9 +22,8 @@
// For consistency, we now set MEDIAPIPE_MOBILE there too. However, for the sake
// of projects that may want to build MediaPipe using alternative build systems,
// we also try to set platform-specific defines in this header if missing.
#if !defined(MEDIAPIPE_MOBILE) && \
(defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) || \
defined(__EMSCRIPTEN__))
#if !defined(MEDIAPIPE_MOBILE) && \
(defined(__ANDROID__) || defined(__EMSCRIPTEN__))
#define MEDIAPIPE_MOBILE
#endif
@ -36,6 +35,11 @@
#include "TargetConditionals.h" // for TARGET_OS_*
#if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX
#define MEDIAPIPE_IOS
#if !defined(MEDIAPIPE_MOBILE) && !TARGET_OS_OSX
#define MEDIAPIPE_MOBILE
#endif
#endif
#if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX
#define MEDIAPIPE_OSX

View File

@ -65,9 +65,9 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
OptionsSyntaxUtil syntax_util;
for (auto& node : *config->mutable_node()) {
FieldData node_data = options_field_util::AsFieldData(node);
for (const std::string& option_def : node.option_value()) {
FieldData node_data = options_field_util::AsFieldData(node);
std::vector<absl::string_view> tag_and_name =
syntax_util.StrSplitTags(option_def);
std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]);
@ -96,6 +96,7 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
status.Update(MergeField(node_path, packet_data, &node_options));
options_field_util::SetOptionsMessage(node_options, &node);
}
node.clear_option_value();
}
return status;
}

View File

@ -137,7 +137,6 @@ TEST_F(OptionsUtilTest, CopyLiteralOptions) {
NightLightCalculatorOptions expected_node_options;
expected_node_options.add_num_lights(8);
expected_node.add_node_options()->PackFrom(expected_node_options);
*expected_node.add_option_value() = "num_lights:options/chain_length";
EXPECT_THAT(actual_node, EqualsProto(expected_node));
MP_EXPECT_OK(graph.StartRun({}));

View File

@ -656,7 +656,6 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
chain_length: 3
}
}
option_value: "chain_length:options/chain_length"
}
type: "MoonSubgraph"
graph_options {
@ -666,5 +665,84 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
EXPECT_THAT(moon_subgraph, mediapipe::EqualsProto(expected_graph));
}
// Shows ExpandSubgraphs applied twice. "option_value" fields are evaluated
// and removed on the first ExpandSubgraphs call. If "option_value" fields
// are not removed during ExpandSubgraphs, they evaluate incorrectly on the
// second ExpandSubgraphs call and this test fails on "expected_node_options".
TEST(SubgraphExpansionTest, SimpleSubgraphOptionsTwice) {
GraphRegistry graph_registry;
// Register a simple-subgraph that accepts graph options.
auto moon_subgraph =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
type: "MoonSubgraph"
graph_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node: {
calculator: "MoonCalculator"
node_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
option_value: "chain_length:options/chain_length"
}
)pb");
graph_registry.Register("MoonSubgraph", moon_subgraph);
// Invoke the simple-subgraph with graph options.
// The empty NodeChainSubgraphOptions below allows "option_value" fields
// on "MoonCalculator" to evaluate incorrectly, if not removed.
auto sky_graph = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
graph_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node: {
calculator: "MoonSubgraph"
options: {
[mediapipe.NodeChainSubgraphOptions.ext] {
node_type: "DoubleIntCalculator"
chain_length: 3
}
}
}
)pb");
// The first ExpandSubgraphs call evaluates and removes "option_value" fields.
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
auto expanded_1 = sky_graph;
// The second ExpandSubgraphs call has no effect on the expanded graph.
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
// Validate the expected node_options for the "MoonSubgraph".
// If the "option_value" fields are not removed during ExpandSubgraphs,
// this test fails with an incorrect value for "chain_length".
auto expected_node_options =
mediapipe::ParseTextProtoOrDie<mediapipe::NodeChainSubgraphOptions>(
"chain_length: 3");
mediapipe::NodeChainSubgraphOptions node_options;
sky_graph.node(0).node_options(0).UnpackTo(&node_options);
ASSERT_THAT(node_options, mediapipe::EqualsProto(expected_node_options));
// Validate the results from both ExpandSubgraphs() calls.
CalculatorGraphConfig expected_graph =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
graph_options {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node {
name: "moonsubgraph__MoonCalculator"
calculator: "MoonCalculator"
node_options {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {
chain_length: 3
}
}
}
)pb");
EXPECT_THAT(expanded_1, mediapipe::EqualsProto(expected_graph));
EXPECT_THAT(sky_graph, mediapipe::EqualsProto(expected_graph));
}
} // namespace
} // namespace mediapipe

View File

@ -112,13 +112,13 @@ GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer, int plane) {
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(plane));
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(plane));
}
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const ImageFrame& image_frame) {
auto gpu_buffer = GpuBuffer::CopyingImageFrame(image_frame);
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(0));
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(0));
}
template <>
@ -149,7 +149,7 @@ GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(
GpuBuffer gpu_buffer =
gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format);
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureWriteView(0));
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetWriteView<GlTextureView>(0));
}
} // namespace mediapipe

View File

@ -224,7 +224,8 @@ void GlTextureBuffer::WaitForConsumersOnGpu() {
// precisely, on only one GL context.
}
GlTextureView GlTextureBuffer::GetGlTextureReadView(
GlTextureView GlTextureBuffer::GetReadView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
auto gl_context = GlContext::GetCurrent();
CHECK(gl_context);
@ -241,7 +242,8 @@ GlTextureView GlTextureBuffer::GetGlTextureReadView(
nullptr);
}
GlTextureView GlTextureBuffer::GetGlTextureWriteView(
GlTextureView GlTextureBuffer::GetWriteView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
auto gl_context = GlContext::GetCurrent();
CHECK(gl_context);
@ -341,7 +343,8 @@ std::unique_ptr<ImageFrame> GlTextureBuffer::AsImageFrame() const {
ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format());
auto output = absl::make_unique<ImageFrame>(
image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary);
auto view = GetGlTextureReadView(nullptr, 0);
auto view =
GetReadView(mediapipe::internal::types<GlTextureView>{}, nullptr, 0);
ReadTexture(view, format(), output->MutablePixelData(),
output->PixelDataSize());
return output;

View File

@ -32,7 +32,9 @@ namespace mediapipe {
class GlCalculatorHelperImpl;
// Implements a GPU memory buffer as an OpenGL texture. For internal use.
class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
class GlTextureBuffer
: public mediapipe::internal::GpuBufferStorageImpl<
GlTextureBuffer, mediapipe::internal::ViewProvider<GlTextureView>> {
public:
// This is called when the texture buffer is deleted. It is passed a sync
// token created at that time on the GlContext. If the GlTextureBuffer has
@ -86,11 +88,12 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
int height() const { return height_; }
GpuBufferFormat format() const { return format_; }
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override;
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
void ViewDoneWriting(const GlTextureView& view) override;
GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override;
GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
std::unique_ptr<ImageFrame> AsImageFrame() const override;
// If this texture is going to be used outside of the context that produced
@ -142,6 +145,8 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
// Returns true on success.
bool CreateInternal(const void* data, int alignment = 4);
void ViewDoneWriting(const GlTextureView& view);
friend class GlCalculatorHelperImpl;
GLuint name_ = 0;

View File

@ -15,6 +15,7 @@
#ifndef MEDIAPIPE_GPU_GPU_BUFFER_H_
#define MEDIAPIPE_GPU_GPU_BUFFER_H_
#include <memory>
#include <utility>
#include "mediapipe/framework/formats/image_frame.h"
@ -23,6 +24,10 @@
#include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/gpu_buffer_storage.h"
// Note: these headers are needed for the legacy storage APIs. Do not add more
// storage-specific headers here. See WebGpuTextureBuffer/View for an example
// of adding a new storage and view.
#if defined(__APPLE__)
#include <CoreVideo/CoreVideo.h>
@ -31,9 +36,7 @@
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#include "mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#else
#include "mediapipe/gpu/gl_texture_buffer.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
@ -60,19 +63,28 @@ class GpuBuffer {
// are not portable. Applications and calculators should normally obtain
// GpuBuffers in a portable way from the framework, e.g. using
// GpuBufferMultiPool.
explicit GpuBuffer(
std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage)
: storage_(std::move(storage)) {}
// Note: these constructors and accessors for specific storage types exist
// for backwards compatibility reasons. Do not add new ones.
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
explicit GpuBuffer(CFHolder<CVPixelBufferRef> pixel_buffer)
: pixel_buffer_(std::move(pixel_buffer)) {}
: storage_(std::make_shared<GpuBufferStorageCvPixelBuffer>(
std::move(pixel_buffer))) {}
explicit GpuBuffer(CVPixelBufferRef pixel_buffer)
: pixel_buffer_(pixel_buffer) {}
: storage_(
std::make_shared<GpuBufferStorageCvPixelBuffer>(pixel_buffer)) {}
CVPixelBufferRef GetCVPixelBufferRef() const { return *pixel_buffer_; }
CVPixelBufferRef GetCVPixelBufferRef() const {
auto p = storage_->down_cast<GpuBufferStorageCvPixelBuffer>();
if (p) return **p;
return nullptr;
}
#else
explicit GpuBuffer(GlTextureBufferSharedPtr texture_buffer)
: texture_buffer_(std::move(texture_buffer)) {}
const GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr() const {
return texture_buffer_;
GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
return internal_storage<GlTextureBuffer>();
}
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
@ -93,14 +105,26 @@ class GpuBuffer {
// Allow assignment from nullptr.
GpuBuffer& operator=(std::nullptr_t other);
GlTextureView GetGlTextureReadView(int plane) const {
return current_storage().GetGlTextureReadView(
std::make_shared<GpuBuffer>(*this), plane);
// Gets a read view of the specified type. The arguments depend on the
// specific view type; see the corresponding ViewProvider.
template <class View, class... Args>
auto GetReadView(Args... args) const {
return current_storage()
.down_cast<mediapipe::internal::ViewProvider<View>>()
->GetReadView(mediapipe::internal::types<View>{},
std::make_shared<GpuBuffer>(*this),
std::forward<Args>(args)...);
}
GlTextureView GetGlTextureWriteView(int plane) {
return current_storage().GetGlTextureWriteView(
std::make_shared<GpuBuffer>(*this), plane);
// Gets a write view of the specified type. The arguments depend on the
// specific view type; see the corresponding ViewProvider.
template <class View, class... Args>
auto GetWriteView(Args... args) {
return current_storage()
.down_cast<mediapipe::internal::ViewProvider<View>>()
->GetWriteView(mediapipe::internal::types<View>{},
std::make_shared<GpuBuffer>(*this),
std::forward<Args>(args)...);
}
// Make a GpuBuffer copying the data from an ImageFrame.
@ -115,77 +139,57 @@ class GpuBuffer {
return current_storage().AsImageFrame();
}
// Attempts to access an underlying storage object of the specified type.
// This method is meant for internal use: user code should access the contents
// using views.
template <class T>
std::shared_ptr<T> internal_storage() const {
if (storage_->down_cast<T>()) return std::static_pointer_cast<T>(storage_);
return nullptr;
}
private:
class PlaceholderGpuBufferStorage
: public mediapipe::internal::GpuBufferStorage {
: public mediapipe::internal::GpuBufferStorageImpl<
PlaceholderGpuBufferStorage> {
public:
int width() const override { return 0; }
int height() const override { return 0; }
virtual GpuBufferFormat format() const override {
return GpuBufferFormat::kUnknown;
}
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override {
return {};
}
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override {
return {};
}
void ViewDoneWriting(const GlTextureView& view) override{};
std::unique_ptr<ImageFrame> AsImageFrame() const override {
return nullptr;
}
};
mediapipe::internal::GpuBufferStorage& no_storage() const {
static PlaceholderGpuBufferStorage placeholder;
std::shared_ptr<mediapipe::internal::GpuBufferStorage>& no_storage() const {
static auto placeholder =
std::static_pointer_cast<mediapipe::internal::GpuBufferStorage>(
std::make_shared<PlaceholderGpuBufferStorage>());
return placeholder;
}
const mediapipe::internal::GpuBufferStorage& current_storage() const {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
if (pixel_buffer_ != nullptr) return pixel_buffer_;
#else
if (texture_buffer_) return *texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return no_storage();
return *storage_;
}
mediapipe::internal::GpuBufferStorage& current_storage() {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
if (pixel_buffer_ != nullptr) return pixel_buffer_;
#else
if (texture_buffer_) return *texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return no_storage();
}
mediapipe::internal::GpuBufferStorage& current_storage() { return *storage_; }
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
GpuBufferStorageCvPixelBuffer pixel_buffer_;
#else
GlTextureBufferSharedPtr texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage_ =
no_storage();
};
inline bool GpuBuffer::operator==(std::nullptr_t other) const {
return &current_storage() == &no_storage();
return storage_ == no_storage();
}
inline bool GpuBuffer::operator==(const GpuBuffer& other) const {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return pixel_buffer_ == other.pixel_buffer_;
#else
return texture_buffer_ == other.texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return storage_ == other.storage_;
}
inline GpuBuffer& GpuBuffer::operator=(std::nullptr_t other) {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
pixel_buffer_.reset(other);
#else
texture_buffer_ = other;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
storage_ = no_storage();
return *this;
}

View File

@ -12,27 +12,73 @@ class GpuBuffer;
namespace mediapipe {
namespace internal {
using mediapipe::GlTextureView;
using mediapipe::GpuBuffer;
using mediapipe::GpuBufferFormat;
template <class... T>
struct types {};
class GlTextureViewManager {
template <class V>
class ViewProvider;
// Note: this specialization temporarily lives here for backwards compatibility
// reasons. New specializations should be put in the same file as their view.
template <>
class ViewProvider<GlTextureView> {
public:
virtual ~GlTextureViewManager() = default;
virtual GlTextureView GetGlTextureReadView(
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const = 0;
virtual GlTextureView GetGlTextureWriteView(
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) = 0;
virtual void ViewDoneWriting(const GlTextureView& view) = 0;
virtual ~ViewProvider() = default;
// Note that the view type is encoded in an argument to allow overloading,
// so a storage class can implement GetRead/WriteView for multiple view types.
// We cannot use a template function because it cannot be virtual; we want to
// have a virtual function here to enforce that different storages supporting
// the same view implement the same signature.
// Note that we allow different views to have custom signatures, providing
// additional view-specific arguments that may be needed.
virtual GlTextureView GetReadView(types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const = 0;
virtual GlTextureView GetWriteView(types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) = 0;
};
class GpuBufferStorage : public GlTextureViewManager {
class GpuBufferStorage {
public:
virtual ~GpuBufferStorage() = default;
virtual int width() const = 0;
virtual int height() const = 0;
virtual GpuBufferFormat format() const = 0;
virtual std::unique_ptr<ImageFrame> AsImageFrame() const = 0;
// We can't use dynamic_cast since we want to support building without RTTI.
// The public methods delegate to the type-erased private virtual method.
template <class T>
T* down_cast() {
return static_cast<T*>(
const_cast<void*>(down_cast(tool::GetTypeHash<T>())));
}
template <class T>
const T* down_cast() const {
return static_cast<const T*>(down_cast(tool::GetTypeHash<T>()));
}
private:
virtual const void* down_cast(size_t type_hash) const = 0;
virtual size_t storage_type_hash() const = 0;
};
template <class T, class... U>
class GpuBufferStorageImpl : public GpuBufferStorage, public U... {
private:
virtual const void* down_cast(size_t type_hash) const override {
return down_cast_impl(type_hash, types<T, U...>{});
}
size_t storage_type_hash() const override { return tool::GetTypeHash<T>(); }
const void* down_cast_impl(size_t type_hash, types<>) const {
return nullptr;
}
template <class V, class... W>
const void* down_cast_impl(size_t type_hash, types<V, W...>) const {
if (type_hash == tool::GetTypeHash<V>()) return static_cast<const V*>(this);
return down_cast_impl(type_hash, types<W...>{});
}
};
} // namespace internal

View File

@ -11,7 +11,8 @@ typedef CVOpenGLTextureRef CVTextureType;
typedef CVOpenGLESTextureRef CVTextureType;
#endif // TARGET_OS_OSX
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
GlTextureView GpuBufferStorageCvPixelBuffer::GetReadView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
CVReturn err;
auto gl_context = GlContext::GetCurrent();
@ -58,11 +59,13 @@ GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
#endif // TARGET_OS_OSX
}
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureWriteView(
GlTextureView GpuBufferStorageCvPixelBuffer::GetWriteView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
// For this storage there is currently no difference between read and write
// views, so we delegate to the read method.
return GetGlTextureReadView(std::move(gpu_buffer), plane);
return GetReadView(mediapipe::internal::types<GlTextureView>{},
std::move(gpu_buffer), plane);
}
void GpuBufferStorageCvPixelBuffer::ViewDoneWriting(const GlTextureView& view) {

View File

@ -12,7 +12,9 @@ namespace mediapipe {
class GlContext;
class GpuBufferStorageCvPixelBuffer
: public mediapipe::internal::GpuBufferStorage,
: public mediapipe::internal::GpuBufferStorageImpl<
GpuBufferStorageCvPixelBuffer,
mediapipe::internal::ViewProvider<GlTextureView>>,
public CFHolder<CVPixelBufferRef> {
public:
using CFHolder<CVPixelBufferRef>::CFHolder;
@ -28,12 +30,16 @@ class GpuBufferStorageCvPixelBuffer
return GpuBufferFormatForCVPixelFormat(
CVPixelBufferGetPixelFormatType(**this));
}
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override;
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override;
GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
std::unique_ptr<ImageFrame> AsImageFrame() const override;
void ViewDoneWriting(const GlTextureView& view) override;
private:
void ViewDoneWriting(const GlTextureView& view);
};
} // namespace mediapipe

View File

@ -8,6 +8,9 @@ input_stream: "input_video"
# Max number of hands to detect/process. (int)
input_side_packet: "num_hands"
# Model complexity (0 or 1). (int)
input_side_packet: "model_complexity"
# GPU image. (GpuBuffer)
output_stream: "output_video"
# Collection of detected/predicted hands, each represented as a list of
@ -39,6 +42,7 @@ node {
node {
calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:throttled_input_video"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "HANDEDNESS:handedness"

View File

@ -370,6 +370,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
jmethodID processMethod = env->GetMethodID(
callback_cls, process_method_name.c_str(), "(Ljava/util/List;)V");
// TODO: move to register natives.
jclass list_cls = env->FindClass("java/util/ArrayList");
jobject java_list =
env->NewObject(list_cls, env->GetMethodID(list_cls, "<init>", "()V"));
@ -392,6 +393,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
RemovePacket(packet_handle);
}
env->DeleteLocalRef(callback_cls);
env->DeleteLocalRef(list_cls);
env->DeleteLocalRef(java_list);
VLOG(2) << "Returned from java callback.";
}

View File

@ -56,8 +56,11 @@ JNIEXPORT jobjectArray JNICALL GRAPH_PROFILER_METHOD(
return nullptr;
}
// TODO: move to register natives.
jclass byte_array_cls = env->FindClass("[B");
jobjectArray profiles =
env->NewObjectArray(num_profiles, env->FindClass("[B"), nullptr);
env->NewObjectArray(num_profiles, byte_array_cls, nullptr);
env->DeleteLocalRef(byte_array_cls);
for (int i = 0; i < num_profiles; i++) {
const auto& profile = profiles_vec[i];
int size = profile.ByteSize();

View File

@ -143,8 +143,10 @@ jthrowable CreateMediaPipeException(JNIEnv* env, absl::Status status) {
env->SetByteArrayRegion(message_bytes, 0, length,
reinterpret_cast<jbyte*>(const_cast<char*>(
std::string(status.message()).c_str())));
return reinterpret_cast<jthrowable>(
jthrowable result = reinterpret_cast<jthrowable>(
env->NewObject(status_cls, status_ctr, status.code(), message_bytes));
env->DeleteLocalRef(status_cls);
return result;
}
bool ThrowIfError(JNIEnv* env, absl::Status status) {
@ -165,11 +167,11 @@ SerializedMessageIds::SerializedMessageIds(JNIEnv* env, jobject data) {
class_registry.GetFieldName(serialized_message, "typeName");
std::string value_obfuscated =
class_registry.GetFieldName(serialized_message, "value");
jclass j_class = reinterpret_cast<jclass>(
env->NewGlobalRef(env->FindClass(serialized_message_obfuscated.c_str())));
jclass j_class = env->FindClass(serialized_message_obfuscated.c_str());
type_name_id = env->GetFieldID(j_class, type_name_obfuscated.c_str(),
"Ljava/lang/String;");
value_id = env->GetFieldID(j_class, value_obfuscated.c_str(), "[B");
env->DeleteLocalRef(j_class);
}
} // namespace android

View File

@ -184,8 +184,11 @@ JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)(
}
const std::vector<const ::mediapipe::proto_ns::MessageLite*>& proto_vector =
get_proto_vector.value();
// TODO: move to register natives.
jclass byte_array_cls = env->FindClass("[B");
jobjectArray proto_array =
env->NewObjectArray(proto_vector.size(), env->FindClass("[B"), nullptr);
env->NewObjectArray(proto_vector.size(), byte_array_cls, nullptr);
env->DeleteLocalRef(byte_array_cls);
for (int i = 0; i < proto_vector.size(); ++i) {
const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i];

View File

@ -137,6 +137,7 @@ void RegisterGraphNatives(JNIEnv *env) {
AddJNINativeMethod(&graph_methods, graph, "nativeGetProfiler", "(J)J",
(void *)&GRAPH_METHOD(nativeGetProfiler));
RegisterNativesVector(env, graph_class, graph_methods);
env->DeleteLocalRef(graph_class);
}
void RegisterGraphProfilerNatives(JNIEnv *env) {
@ -151,6 +152,7 @@ void RegisterGraphProfilerNatives(JNIEnv *env) {
&graph_profiler_methods, graph_profiler, "nativeGetCalculatorProfiles",
"(J)[[B", (void *)&GRAPH_PROFILER_METHOD(nativeGetCalculatorProfiles));
RegisterNativesVector(env, graph_profiler_class, graph_profiler_methods);
env->DeleteLocalRef(graph_profiler_class);
}
void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
@ -171,6 +173,7 @@ void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
(void *)&ANDROID_ASSET_UTIL_METHOD(nativeInitializeAssetManager));
RegisterNativesVector(env, android_asset_util_class,
android_asset_util_methods);
env->DeleteLocalRef(android_asset_util_class);
#endif
}
@ -191,6 +194,7 @@ void RegisterAndroidPacketCreatorNatives(JNIEnv *env) {
(void *)&ANDROID_PACKET_CREATOR_METHOD(nativeCreateRgbImageFrame));
RegisterNativesVector(env, android_packet_creator_class,
android_packet_creator_methods);
env->DeleteLocalRef(android_packet_creator_class);
#endif
}
@ -232,6 +236,7 @@ void RegisterPacketCreatorNatives(JNIEnv *env) {
"(JL" + serialized_message_name + ";)J",
(void *)&PACKET_CREATOR_METHOD(nativeCreateProto));
RegisterNativesVector(env, packet_creator_class, packet_creator_methods);
env->DeleteLocalRef(packet_creator_class);
}
void RegisterPacketGetterNatives(JNIEnv *env) {
@ -260,6 +265,7 @@ void RegisterPacketGetterNatives(JNIEnv *env) {
"nativeGetFloat32Vector", "(J)[F",
(void *)&PACKET_GETTER_METHOD(nativeGetFloat32Vector));
RegisterNativesVector(env, packet_getter_class, packet_getter_methods);
env->DeleteLocalRef(packet_getter_class);
}
void RegisterPacketNatives(JNIEnv *env) {
@ -278,6 +284,7 @@ void RegisterPacketNatives(JNIEnv *env) {
AddJNINativeMethod(&packet_methods, packet, "nativeIsEmpty", "(J)Z",
(void *)&PACKET_METHOD(nativeIsEmpty));
RegisterNativesVector(env, packet_class, packet_methods);
env->DeleteLocalRef(packet_class);
}
void RegisterCompatNatives(JNIEnv *env) {
@ -293,6 +300,7 @@ void RegisterCompatNatives(JNIEnv *env) {
"(I)J",
(void *)&COMPAT_METHOD(getCurrentNativeEGLSurface));
RegisterNativesVector(env, compat_class, compat_methods);
env->DeleteLocalRef(compat_class);
}
} // namespace

View File

@ -95,13 +95,12 @@ public class ImageSolutionResult implements SolutionResult {
}
}
// Releases image packet and the underlying data.
void releaseImagePackets() {
imagePacket.release();
// Clears the underlying image packets to prevent the callers from accessing the invalid packets
// outside of the output callback method.
void clearImagePackets() {
imagePacket = null;
if (imageResultPackets != null) {
for (Packet p : imageResultPackets) {
p.release();
}
imageResultPackets.clear();
}
}
}

View File

@ -90,12 +90,9 @@ public class OutputHandler<T extends SolutionResult> {
Log.e(TAG, "Error occurs when getting MediaPipe solution result. " + e);
}
} finally {
for (Packet packet : packets) {
packet.release();
}
if (solutionResult instanceof ImageSolutionResult) {
ImageSolutionResult imageSolutionResult = (ImageSolutionResult) solutionResult;
imageSolutionResult.releaseImagePackets();
imageSolutionResult.clearImagePackets();
}
}
}

View File

@ -34,7 +34,6 @@ android_library(
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework/formats:detection_java_proto_lite",
"//mediapipe/framework/formats:location_data_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_base",
"//third_party:autovalue",

View File

@ -17,7 +17,6 @@ package com.google.mediapipe.solutions.facedetection;
import android.content.Context;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutioncore.ErrorListener;
@ -104,27 +103,4 @@ public class FaceDetection extends ImageSolutionBase {
this.outputHandler.setErrorListener(listener);
this.errorListener = listener;
}
/**
* Gets a specific face keypoint by face index and face keypoint type.
*
* @param result the returned {@link FaceDetectionResult} object.
* @param faceIndex the face index. A smaller index maps to a detected face with a higher
* confidence score.
* @param faceKeypointType the face keypoint type defined in {@link FaceKeypoint}.
*/
public static RelativeKeypoint getFaceKeypoint(
FaceDetectionResult result,
int faceIndex,
@FaceKeypoint.FaceKeypointType int faceKeypointType) {
if (result == null
|| faceIndex >= result.multiFaceDetections().size()
|| faceKeypointType >= FaceKeypoint.NUM_KEY_POINTS) {
return RelativeKeypoint.getDefaultInstance();
}
Detection detection = result.multiFaceDetections().get(faceIndex);
float x = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getX();
float y = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getY();
return RelativeKeypoint.newBuilder().setX(x).setY(y).build();
}
}

View File

@ -23,12 +23,13 @@ android_library(
"HandsResult.java",
],
assets = [
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
],
assets_dir = "",
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],

View File

@ -18,9 +18,10 @@ import android.content.Context;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList;
import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutioncore.ErrorListener;
@ -28,7 +29,9 @@ import com.google.mediapipe.solutioncore.ImageSolutionBase;
import com.google.mediapipe.solutioncore.OutputHandler;
import com.google.mediapipe.solutioncore.ResultListener;
import com.google.mediapipe.solutioncore.SolutionInfo;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
@ -85,10 +88,15 @@ public class Hands extends ImageSolutionBase {
private static final String CPU_GRAPH_NAME = "hand_landmark_tracking_cpu_image.binarypb";
private static final String IMAGE_INPUT_STREAM = "image";
private static final ImmutableList<String> OUTPUT_STREAMS =
ImmutableList.of("multi_hand_landmarks", "multi_handedness", "throttled_image");
ImmutableList.of(
"multi_hand_landmarks",
"multi_hand_world_landmarks",
"multi_handedness",
"throttled_image");
private static final int LANDMARKS_INDEX = 0;
private static final int HANDEDNESS_INDEX = 1;
private static final int INPUT_IMAGE_INDEX = 2;
private static final int WORLD_LANDMARKS_INDEX = 1;
private static final int HANDEDNESS_INDEX = 2;
private static final int INPUT_IMAGE_INDEX = 3;
private final OutputHandler<HandsResult> outputHandler;
/**
@ -109,8 +117,18 @@ public class Hands extends ImageSolutionBase {
reportError("Error occurs while getting MediaPipe hand landmarks.", e);
}
try {
handsResultBuilder.setMultiHandedness(
getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser()));
handsResultBuilder.setMultiHandWorldLandmarks(
getProtoVector(packets.get(WORLD_LANDMARKS_INDEX), LandmarkList.parser()));
} catch (MediaPipeException e) {
reportError("Error occurs while getting MediaPipe hand world landmarks.", e);
}
try {
List<Classification> handednessList = new ArrayList<>();
for (ClassificationList protolist :
getProtoVector(packets.get(HANDEDNESS_INDEX), ClassificationList.parser())) {
handednessList.add(protolist.getClassification(0));
}
handsResultBuilder.setMultiHandedness(handednessList);
} catch (MediaPipeException e) {
reportError("Error occurs while getting MediaPipe handedness data.", e);
}
@ -155,21 +173,4 @@ public class Hands extends ImageSolutionBase {
this.outputHandler.setErrorListener(listener);
this.errorListener = listener;
}
/**
* Gets a specific hand landmark by hand index and hand landmark type.
*
* @param result the returned {@link HandsResult} object.
* @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
* @param landmarkType the hand landmark type defined in {@link HandLandmark}.
*/
public static NormalizedLandmark getHandLandmark(
HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
if (result == null
|| handIndex >= result.multiHandLandmarks().size()
|| landmarkType >= HandLandmark.NUM_LANDMARKS) {
return NormalizedLandmark.getDefaultInstance();
}
return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
}
}

View File

@ -17,6 +17,7 @@ package com.google.mediapipe.solutions.hands;
import android.graphics.Bitmap;
import com.google.auto.value.AutoBuilder;
import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.framework.Packet;
@ -31,14 +32,17 @@ import java.util.List;
*/
public class HandsResult extends ImageSolutionResult {
private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
private final ImmutableList<LandmarkList> multiHandWorldLandmarks;
private final ImmutableList<Classification> multiHandedness;
HandsResult(
ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
ImmutableList<LandmarkList> multiHandWorldLandmarks,
ImmutableList<Classification> multiHandedness,
Packet imagePacket,
long timestamp) {
this.multiHandLandmarks = multiHandLandmarks;
this.multiHandWorldLandmarks = multiHandWorldLandmarks;
this.multiHandedness = multiHandedness;
this.timestamp = timestamp;
this.imagePacket = imagePacket;
@ -53,6 +57,12 @@ public class HandsResult extends ImageSolutionResult {
return multiHandLandmarks;
}
// Collection of detected/tracked hands' landmarks in real-world 3D coordinates that are in meters
// with the origin at the hand's approximate geometric center.
public ImmutableList<LandmarkList> multiHandWorldLandmarks() {
return multiHandWorldLandmarks;
}
// Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
// hand is composed of label and score. label is a string of value either "Left" or "Right". score
// is the estimated probability of the predicted handedness and is always greater than or equal to
@ -70,6 +80,8 @@ public class HandsResult extends ImageSolutionResult {
public abstract static class Builder {
abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);
abstract Builder setMultiHandWorldLandmarks(List<LandmarkList> value);
abstract Builder setMultiHandedness(List<Classification> value);
abstract Builder setTimestamp(long value);

View File

@ -24,7 +24,6 @@ package(default_visibility = ["//visibility:public"])
exports_files([
"hand_landmark_full.tflite",
"hand_landmark_lite.tflite",
"hand_landmark_sparse.tflite",
"handedness.txt",
])
@ -56,6 +55,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
],
)
@ -75,6 +75,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
],
)

View File

@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"
@ -77,11 +87,13 @@ node {
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
}
}
}
@ -175,3 +187,33 @@ node {
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"
@ -71,11 +81,13 @@ node {
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
}
}
}
@ -169,3 +181,33 @@ node {
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto.
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
@ -89,6 +103,7 @@ node {
# Detects palms.
node {
calculator: "PalmDetectionCpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections"
}
@ -186,12 +201,13 @@ node {
input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness"
}
# Collects the handedness for each single hand into a vector. Upon
# receiving the BATCH_END timestamp, outputs a vector of classification at the
# BATCH_END timestamp.
# Collects the handedness for each single hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# timestamp.
node {
calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness"
@ -218,6 +234,16 @@ node {
output_stream: "ITERABLE:multi_hand_landmarks"
}
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
@ -25,6 +25,7 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
@ -32,8 +33,22 @@ output_stream: "IMAGE:throttled_image"
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto.
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
@ -93,6 +108,7 @@ node {
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto.
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
@ -89,6 +103,7 @@ node {
# Detects palms.
node {
calculator: "PalmDetectionGpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections"
}
@ -187,12 +202,13 @@ node {
input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness"
}
# Collects the handedness for each single hand into a vector. Upon
# receiving the BATCH_END timestamp, outputs a vector of classification at the
# BATCH_END timestamp.
# Collects the handedness for each single hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# timestamp.
node {
calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness"
@ -219,6 +235,16 @@ node {
output_stream: "ITERABLE:multi_hand_landmarks"
}
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto.
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
@ -93,6 +107,7 @@ node {
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"

View File

@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks"
@ -72,5 +77,6 @@ node {
calculator: "FaceLandmarkCpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks"
}

View File

@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks"
@ -72,5 +77,6 @@ node {
calculator: "FaceLandmarkGpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks"
}

View File

@ -35,6 +35,7 @@
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# output_stream: "POSE_LANDMARKS:pose_landmarks"
# output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
calculator: "FaceLandmarksFromPoseCpu"
input_stream: "IMAGE:image"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
output_stream: "FACE_LANDMARKS:face_landmarks"
}

View File

@ -35,6 +35,7 @@
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# output_stream: "POSE_LANDMARKS:pose_landmarks"
# output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
calculator: "FaceLandmarksFromPoseGpu"
input_stream: "IMAGE:image"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
output_stream: "FACE_LANDMARKS:face_landmarks"
}

View File

@ -21,13 +21,29 @@ licenses(["notice"])
package(default_visibility = ["//visibility:public"])
exports_files(["palm_detection.tflite"])
exports_files([
"palm_detection_lite.tflite",
"palm_detection_full.tflite",
])
mediapipe_simple_subgraph(
name = "palm_detection_model_loader",
graph = "palm_detection_model_loader.pbtxt",
register_as = "PalmDetectionModelLoader",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/tflite:tflite_model_calculator",
"//mediapipe/calculators/util:local_file_contents_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph(
name = "palm_detection_cpu",
graph = "palm_detection_cpu.pbtxt",
register_as = "PalmDetectionCpu",
deps = [
":palm_detection_model_loader",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -43,6 +59,7 @@ mediapipe_simple_subgraph(
graph = "palm_detection_gpu.pbtxt",
register_as = "PalmDetectionGpu",
deps = [
":palm_detection_model_loader",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -52,10 +69,3 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
exports_files(
srcs = [
"palm_detection.tflite",
"palm_detection_labelmap.txt",
],
)

View File

@ -5,6 +5,11 @@ type: "PalmDetectionCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Detected palms. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
min: 0.0
max: 1.0
}
border_mode: BORDER_ZERO
@ -39,6 +44,13 @@ node {
output_side_packet: "opresolver"
}
# Loads the palm detection TF Lite model.
node {
calculator: "PalmDetectionModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
@ -47,9 +59,9 @@ node {
input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:detection_tensors"
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
input_side_packet: "MODEL:model"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
delegate { xnnpack {} }
}
}
@ -65,8 +77,8 @@ node {
num_layers: 4
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 128
input_size_width: 128
input_size_width: 192
input_size_height: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
@ -90,7 +102,7 @@ node {
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 896
num_boxes: 2016
num_coords: 18
box_coord_offset: 0
keypoint_coord_offset: 4
@ -100,10 +112,10 @@ node {
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 128.0
y_scale: 128.0
h_scale: 128.0
w_scale: 128.0
x_scale: 192.0
y_scale: 192.0
w_scale: 192.0
h_scale: 192.0
min_score_thresh: 0.5
}
}

Binary file not shown.

View File

@ -5,6 +5,11 @@ type: "PalmDetectionGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Detected palms. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
min: 0.0
max: 1.0
}
border_mode: BORDER_ZERO
@ -45,6 +50,13 @@ node {
}
}
# Loads the palm detection TF Lite model.
node {
calculator: "PalmDetectionModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
@ -53,10 +65,10 @@ node {
input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:detection_tensors"
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
input_side_packet: "MODEL:model"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
use_gpu: true
delegate { gpu {} }
}
}
}
@ -71,8 +83,8 @@ node {
num_layers: 4
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 128
input_size_width: 128
input_size_width: 192
input_size_height: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
@ -96,7 +108,7 @@ node {
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 896
num_boxes: 2016
num_coords: 18
box_coord_offset: 0
keypoint_coord_offset: 4
@ -106,10 +118,10 @@ node {
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 128.0
y_scale: 128.0
h_scale: 128.0
w_scale: 128.0
x_scale: 192.0
y_scale: 192.0
w_scale: 192.0
h_scale: 192.0
min_score_thresh: 0.5
}
}

Binary file not shown.

View File

@ -0,0 +1,63 @@
# MediaPipe graph to load a selected palm detection TF Lite model.
type: "PalmDetectionModelLoader"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# TF Lite model represented as a FlatBuffer.
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
output_side_packet: "MODEL:model"
# Determines path to the desired pose landmark model file.
node {
calculator: "SwitchContainer"
input_side_packet: "SELECT:model_complexity"
output_side_packet: "PACKET:model_path"
options: {
[mediapipe.SwitchContainerOptions.ext] {
select: 1
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/palm_detection/palm_detection_lite.tflite"
}
}
}
}
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/palm_detection/palm_detection_full.tflite"
}
}
}
}
}
}
}
# Loads the file in the specified path into a blob.
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:model_path"
output_side_packet: "CONTENTS:model_blob"
options: {
[mediapipe.LocalFileContentsCalculatorOptions.ext]: {
text_mode: false
}
}
}
# Converts the input blob into a TF Lite model.
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:model_blob"
output_side_packet: "MODEL:model"
}

View File

@ -127,6 +127,7 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
- (void)videoUpdateIfNeeded {
CMTime timestamp = [_videoItem currentTime];
if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) {
CVPixelBufferRef pixelBuffer =
[_videoOutput copyPixelBufferForItemTime:timestamp itemTimeForDisplay:nil];
@ -139,6 +140,12 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
}
CFRelease(pixelBuffer);
});
} else if (!_videoDisplayLink.paused && _videoPlayer.rate == 0) {
// The video might be paused by the operating system fo other reasons not catched by the context
// of an interruption. If this condition happens the @c _videoDisplayLink will not have a
// paused state, while the _videoPlayer will have rate 0 AKA paused. In this scenario we restart
// the video playback.
[_videoPlayer play];
}
}

View File

@ -124,7 +124,10 @@ class Hands(SolutionBase):
'handlandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence,
},
outputs=['multi_hand_landmarks', 'multi_handedness'])
outputs=[
'multi_hand_landmarks', 'multi_hand_world_landmarks',
'multi_handedness'
])
def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
@ -137,10 +140,14 @@ class Hands(SolutionBase):
ValueError: If the input image is not three channel RGB.
Returns:
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
contains the hand landmarks on each detected hand and a "multi_handedness"
field that contains the handedness (left v.s. right hand) of the detected
hand.
A NamedTuple object with the following fields:
1) a "multi_hand_landmarks" field that contains the hand landmarks on
each detected hand.
2) a "multi_hand_world_landmarks" field that contains the hand landmarks
on each detected hand in real-world 3D coordinates that are in meters
with the origin at the hand's approximate geometric center.
3) a "multi_handedness" field that contains the handedness (left v.s.
right hand) of the detected hand.
"""
return super().process(input_data={'image': image})

View File

@ -34,20 +34,20 @@ from mediapipe.python.solutions import hands as mp_hands
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
LITE_MODEL_DIFF_THRESHOLD = 25 # pixels
FULL_MODEL_DIFF_THRESHOLD = 20 # pixels
EXPECTED_HAND_COORDINATES_PREDICTION = [[[138, 343], [211, 330], [257, 286],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]],
[[580, 34], [504, 50], [459, 94],
EXPECTED_HAND_COORDINATES_PREDICTION = [[[580, 34], [504, 50], [459, 94],
[429, 146], [397, 182], [507, 167],
[479, 245], [469, 292], [464, 330],
[545, 180], [534, 265], [533, 319],
[536, 360], [581, 172], [587, 252],
[593, 304], [599, 346], [615, 168],
[628, 223], [638, 258], [648, 288]]]
[628, 223], [638, 258], [648, 288]],
[[138, 343], [211, 330], [257, 286],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]]]
class HandsTest(parameterized.TestCase):

View File

@ -80,6 +80,7 @@ class Holistic(SolutionBase):
smooth_landmarks=True,
enable_segmentation=False,
smooth_segmentation=True,
refine_face_landmarks=False,
min_detection_confidence=0.5,
min_tracking_confidence=0.5):
"""Initializes a MediaPipe Holistic object.
@ -98,6 +99,10 @@ class Holistic(SolutionBase):
smooth_segmentation: Whether to filter segmentation across different input
images to reduce jitter. See details in
https://solutions.mediapipe.dev/holistic#smooth_segmentation.
refine_face_landmarks: Whether to further refine the landmark coordinates
around the eyes and lips, and output additional landmarks around the
irises. Default to False. See details in
https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
@ -114,6 +119,7 @@ class Holistic(SolutionBase):
'enable_segmentation': enable_segmentation,
'smooth_segmentation':
smooth_segmentation and not static_image_mode,
'refine_face_landmarks': refine_face_landmarks,
'use_prev_landmarks': not static_image_mode,
},
calculator_params={

View File

@ -99,18 +99,23 @@ class PoseTest(parameterized.TestCase):
results = holistic.process(image)
self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3),
('static_full', True, 1, 3),
('static_heavy', True, 2, 3),
('video_lite', False, 0, 3),
('video_full', False, 1, 3),
('video_heavy', False, 2, 3))
def test_on_image(self, static_image_mode, model_complexity, num_frames):
@parameterized.named_parameters(('static_lite', True, 0, False, 3),
('static_full', True, 1, False, 3),
('static_heavy', True, 2, False, 3),
('video_lite', False, 0, False, 3),
('video_full', False, 1, False, 3),
('video_heavy', False, 2, False, 3),
('static_full_refine_face', True, 1, True, 3),
('video_full_refine_face', False, 1, True, 3))
def test_on_image(self, static_image_mode, model_complexity,
refine_face_landmarks, num_frames):
image_path = os.path.join(os.path.dirname(__file__),
'testdata/holistic.jpg')
image = cv2.imread(image_path)
with mp_holistic.Holistic(static_image_mode=static_image_mode,
model_complexity=model_complexity) as holistic:
with mp_holistic.Holistic(
static_image_mode=static_image_mode,
model_complexity=model_complexity,
refine_face_landmarks=refine_face_landmarks) as holistic:
for idx in range(num_frames):
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx)
@ -129,7 +134,8 @@ class PoseTest(parameterized.TestCase):
EXPECTED_RIGHT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD)
# TODO: Verify the correctness of the face landmarks.
self.assertLen(results.face_landmarks.landmark, 468)
self.assertLen(results.face_landmarks.landmark,
478 if refine_face_landmarks else 468)
if __name__ == '__main__':