Project import generated by Copybara.

GitOrigin-RevId: d4a11282d20fe4d2e137f9032cf349750030dcb9
This commit is contained in:
MediaPipe Team 2021-11-03 14:21:54 -07:00 committed by jqtang
parent 1faeaae7e5
commit d4bb35fe5a
72 changed files with 1089 additions and 336 deletions

View File

@ -257,8 +257,15 @@ glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
glSurfaceView.setRenderInputImage(true); glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener( faceDetection.setResultListener(
faceDetectionResult -> { faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip = RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP); faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -297,10 +304,17 @@ FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this); FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
faceDetection.setResultListener( faceDetection.setResultListener(
faceDetectionResult -> { faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
int width = faceDetectionResult.inputBitmap().getWidth(); int width = faceDetectionResult.inputBitmap().getWidth();
int height = faceDetectionResult.inputBitmap().getHeight(); int height = faceDetectionResult.inputBitmap().getHeight();
RelativeKeypoint noseTip = RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP); faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -334,9 +348,9 @@ ActivityResultLauncher<Intent> imageGetter =
} }
} }
}); });
Intent gallery = new Intent( Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
``` ```
#### Video Input #### Video Input
@ -368,8 +382,15 @@ glSurfaceView.setRenderInputImage(true);
faceDetection.setResultListener( faceDetection.setResultListener(
faceDetectionResult -> { faceDetectionResult -> {
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip = RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP); faceDetectionResult
.multiFaceDetections()
.get(0)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -398,9 +419,9 @@ ActivityResultLauncher<Intent> videoGetter =
} }
} }
}); });
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
``` ```
## Example Apps ## Example Apps

View File

@ -612,9 +612,9 @@ ActivityResultLauncher<Intent> imageGetter =
} }
} }
}); });
Intent gallery = new Intent( Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
``` ```
#### Video Input #### Video Input
@ -678,9 +678,9 @@ ActivityResultLauncher<Intent> videoGetter =
} }
} }
}); });
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
``` ```
## Example Apps ## Example Apps

View File

@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
mobile real-time uses in a manner similar to the face detection model in mobile real-time uses in a manner similar to the face detection model in
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex [MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
task: our task: our
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite) [lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
has to work across a variety of hand sizes with a large scale span (~20x) and
[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
have to work across a variety of hand sizes with a large scale span (~20x)
relative to the image frame and be able to detect occluded and self-occluded relative to the image frame and be able to detect occluded and self-occluded
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
region, the lack of such features in hands makes it comparatively difficult to region, the lack of such features in hands makes it comparatively difficult to
@ -195,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
and the smaller the value the closer the landmark is to the camera. The and the smaller the value the closer the landmark is to the camera. The
magnitude of `z` uses roughly the same scale as `x`. magnitude of `z` uses roughly the same scale as `x`.
#### multi_hand_world_landmarks
Collection of detected/tracked hands, where each hand is represented as a list
of 21 hand landmarks in world coordinates. Each landmark consists of the
following:
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
hand's approximate geometric center.
* `visibility`: Identical to that defined in the corresponding
[multi_hand_landmarks](#multi_hand_landmarks).
#### multi_handedness #### multi_handedness
Collection of handedness of the detected/tracked hands (i.e. is it a left or Collection of handedness of the detected/tracked hands (i.e. is it a left or
@ -262,6 +275,12 @@ with mp_hands.Hands(
mp_drawing_styles.get_default_hand_connections_style()) mp_drawing_styles.get_default_hand_connections_style())
cv2.imwrite( cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1)) '/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
# Draw hand world landmarks.
if not results.multi_hand_world_landmarks:
continue
for hand_world_landmarks in results.multi_hand_world_landmarks:
mp_drawing.plot_landmarks(
hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)
# For webcam input: # For webcam input:
cap = cv2.VideoCapture(0) cap = cv2.VideoCapture(0)
@ -400,7 +419,7 @@ Supported configuration options:
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setStaticImageMode(false) .setStaticImageMode(false)
.setMaxNumHands(1) .setMaxNumHands(2)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
hands.setErrorListener( hands.setErrorListener(
@ -423,8 +442,11 @@ glSurfaceView.setRenderInputImage(true);
hands.setResultListener( hands.setResultListener(
handsResult -> { handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark( if (result.multiHandLandmarks().isEmpty()) {
handsResult, 0, HandLandmark.WRIST); return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -453,7 +475,7 @@ glSurfaceView.post(
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setStaticImageMode(true) .setStaticImageMode(true)
.setMaxNumHands(1) .setMaxNumHands(2)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
@ -464,10 +486,13 @@ Hands hands = new Hands(this, handsOptions);
HandsResultImageView imageView = new HandsResultImageView(this); HandsResultImageView imageView = new HandsResultImageView(this);
hands.setResultListener( hands.setResultListener(
handsResult -> { handsResult -> {
if (result.multiHandLandmarks().isEmpty()) {
return;
}
int width = handsResult.inputBitmap().getWidth(); int width = handsResult.inputBitmap().getWidth();
int height = handsResult.inputBitmap().getHeight(); int height = handsResult.inputBitmap().getHeight();
NormalizedLandmark wristLandmark = Hands.getHandLandmark( NormalizedLandmark wristLandmark =
handsResult, 0, HandLandmark.WRIST); handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -501,9 +526,9 @@ ActivityResultLauncher<Intent> imageGetter =
} }
} }
}); });
Intent gallery = new Intent( Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
``` ```
#### Video Input #### Video Input
@ -513,7 +538,7 @@ imageGetter.launch(gallery);
HandsOptions handsOptions = HandsOptions handsOptions =
HandsOptions.builder() HandsOptions.builder()
.setStaticImageMode(false) .setStaticImageMode(false)
.setMaxNumHands(1) .setMaxNumHands(2)
.setRunOnGpu(true).build(); .setRunOnGpu(true).build();
Hands hands = new Hands(this, handsOptions); Hands hands = new Hands(this, handsOptions);
hands.setErrorListener( hands.setErrorListener(
@ -536,8 +561,11 @@ glSurfaceView.setRenderInputImage(true);
hands.setResultListener( hands.setResultListener(
handsResult -> { handsResult -> {
NormalizedLandmark wristLandmark = Hands.getHandLandmark( if (result.multiHandLandmarks().isEmpty()) {
handsResult, 0, HandLandmark.WRIST); return;
}
NormalizedLandmark wristLandmark =
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i( Log.i(
TAG, TAG,
String.format( String.format(
@ -566,9 +594,9 @@ ActivityResultLauncher<Intent> videoGetter =
} }
} }
}); });
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
``` ```
## Example Apps ## Example Apps

View File

@ -159,6 +159,11 @@ images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
`true`. `true`.
#### refine_face_landmarks
Whether to further refine the landmark coordinates around the eyes and lips, and
output additional landmarks around the irises. Default to `false`.
#### min_detection_confidence #### min_detection_confidence
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
@ -241,6 +246,7 @@ Supported configuration options:
* [smooth_landmarks](#smooth_landmarks) * [smooth_landmarks](#smooth_landmarks)
* [enable_segmentation](#enable_segmentation) * [enable_segmentation](#enable_segmentation)
* [smooth_segmentation](#smooth_segmentation) * [smooth_segmentation](#smooth_segmentation)
* [refine_face_landmarks](#refine_face_landmarks)
* [min_detection_confidence](#min_detection_confidence) * [min_detection_confidence](#min_detection_confidence)
* [min_tracking_confidence](#min_tracking_confidence) * [min_tracking_confidence](#min_tracking_confidence)
@ -256,7 +262,8 @@ IMAGE_FILES = []
with mp_holistic.Holistic( with mp_holistic.Holistic(
static_image_mode=True, static_image_mode=True,
model_complexity=2, model_complexity=2,
enable_segmentation=True) as holistic: enable_segmentation=True,
refine_face_landmarks=True) as holistic:
for idx, file in enumerate(IMAGE_FILES): for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file) image = cv2.imread(file)
image_height, image_width, _ = image.shape image_height, image_width, _ = image.shape
@ -350,6 +357,7 @@ Supported configuration options:
* [smoothLandmarks](#smooth_landmarks) * [smoothLandmarks](#smooth_landmarks)
* [enableSegmentation](#enable_segmentation) * [enableSegmentation](#enable_segmentation)
* [smoothSegmentation](#smooth_segmentation) * [smoothSegmentation](#smooth_segmentation)
* [refineFaceLandmarks](#refineFaceLandmarks)
* [minDetectionConfidence](#min_detection_confidence) * [minDetectionConfidence](#min_detection_confidence)
* [minTrackingConfidence](#min_tracking_confidence) * [minTrackingConfidence](#min_tracking_confidence)
@ -421,6 +429,7 @@ holistic.setOptions({
smoothLandmarks: true, smoothLandmarks: true,
enableSegmentation: true, enableSegmentation: true,
smoothSegmentation: true, smoothSegmentation: true,
refineFaceLandmarks: true,
minDetectionConfidence: 0.5, minDetectionConfidence: 0.5,
minTrackingConfidence: 0.5 minTrackingConfidence: 0.5
}); });

View File

@ -55,15 +55,14 @@ one over the other.
### [Hands](https://google.github.io/mediapipe/solutions/hands) ### [Hands](https://google.github.io/mediapipe/solutions/hands)
* Palm detection model: * Palm detection model:
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite), [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1) [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
* Hand landmark model: * Hand landmark model:
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite), [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite), [TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1) [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
* [Model card](https://mediapipe.page.link/handmc), * [Model card](https://mediapipe.page.link/handmc)
[Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
### [Pose](https://google.github.io/mediapipe/solutions/pose) ### [Pose](https://google.github.io/mediapipe/solutions/pose)

View File

@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
const auto calculator_options = const auto calculator_options =
cc->Options<mediapipe::PacketClonerCalculatorOptions>(); cc->Options<mediapipe::PacketClonerCalculatorOptions>();
output_only_when_all_inputs_received_ = output_only_when_all_inputs_received_ =
calculator_options.output_only_when_all_inputs_received(); calculator_options.output_only_when_all_inputs_received() ||
calculator_options.output_packets_only_when_all_inputs_received();
output_empty_packets_before_all_inputs_received_ =
calculator_options.output_packets_only_when_all_inputs_received();
// Parse input streams. // Parse input streams.
tick_signal_index_ = cc->Inputs().NumEntries() - 1; tick_signal_index_ = cc->Inputs().NumEntries() - 1;
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
// Return if one of the input is null. // Return if one of the input is null.
for (int i = 0; i < tick_signal_index_; ++i) { for (int i = 0; i < tick_signal_index_; ++i) {
if (current_[i].IsEmpty()) { if (current_[i].IsEmpty()) {
if (output_empty_packets_before_all_inputs_received_) {
SetAllNextTimestampBounds(cc);
}
return absl::OkStatus(); return absl::OkStatus();
} }
} }
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
} }
private: private:
void SetAllNextTimestampBounds(CalculatorContext* cc) {
for (int j = 0; j < tick_signal_index_; ++j) {
cc->Outputs().Index(j).SetNextTimestampBound(
cc->InputTimestamp().NextAllowedInStream());
}
}
std::vector<Packet> current_; std::vector<Packet> current_;
int tick_signal_index_; int tick_signal_index_;
bool output_only_when_all_inputs_received_; bool output_only_when_all_inputs_received_;
bool output_empty_packets_before_all_inputs_received_;
}; };
REGISTER_CALCULATOR(PacketClonerCalculator); REGISTER_CALCULATOR(PacketClonerCalculator);

View File

@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
// When true, this calculator will drop received TICK packets if any input // When true, this calculator will drop received TICK packets if any input
// stream hasn't received a packet yet. // stream hasn't received a packet yet.
optional bool output_only_when_all_inputs_received = 1 [default = false]; optional bool output_only_when_all_inputs_received = 1 [default = false];
// Similar with above, but also transmit empty packet for all streams before
// all inputs are received.
optional bool output_packets_only_when_all_inputs_received = 2
[default = false];
} }

View File

@ -32,9 +32,9 @@ public class FaceDetectionResultImageView extends AppCompatImageView {
private static final String TAG = "FaceDetectionResultImageView"; private static final String TAG = "FaceDetectionResultImageView";
private static final int KEYPOINT_COLOR = Color.RED; private static final int KEYPOINT_COLOR = Color.RED;
private static final int KEYPOINT_RADIUS = 15; private static final int KEYPOINT_RADIUS = 8; // Pixels
private static final int BBOX_COLOR = Color.GREEN; private static final int BBOX_COLOR = Color.GREEN;
private static final int BBOX_THICKNESS = 10; private static final int BBOX_THICKNESS = 5; // Pixels
private Bitmap latest; private Bitmap latest;
public FaceDetectionResultImageView(Context context) { public FaceDetectionResultImageView(Context context) {

View File

@ -28,7 +28,6 @@ import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts; import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface; import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency // ContentResolver dependency
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
import com.google.mediapipe.solutioncore.VideoInput; import com.google.mediapipe.solutioncore.VideoInput;
@ -36,6 +35,7 @@ import com.google.mediapipe.solutions.facedetection.FaceDetection;
import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions; import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions;
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult; import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
import com.google.mediapipe.solutions.facedetection.FaceKeypoint; import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -175,9 +175,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline(); setupStaticImageModePipeline();
} }
// Reads images from gallery. // Reads images from gallery.
Intent gallery = Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
}); });
imageView = new FaceDetectionResultImageView(this); imageView = new FaceDetectionResultImageView(this);
} }
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline(); stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO); setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery. // Reads video from gallery.
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
}); });
} }
@ -334,8 +334,15 @@ public class MainActivity extends AppCompatActivity {
private void logNoseTipKeypoint( private void logNoseTipKeypoint(
FaceDetectionResult result, int faceIndex, boolean showPixelValues) { FaceDetectionResult result, int faceIndex, boolean showPixelValues) {
if (result.multiFaceDetections().isEmpty()) {
return;
}
RelativeKeypoint noseTip = RelativeKeypoint noseTip =
FaceDetection.getFaceKeypoint(result, faceIndex, FaceKeypoint.NOSE_TIP); result
.multiFaceDetections()
.get(faceIndex)
.getLocationData()
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates. // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) { if (showPixelValues) {
int width = result.inputBitmap().getWidth(); int width = result.inputBitmap().getWidth();

View File

@ -34,19 +34,19 @@ public class FaceMeshResultImageView extends AppCompatImageView {
private static final String TAG = "FaceMeshResultImageView"; private static final String TAG = "FaceMeshResultImageView";
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0"); private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
private static final int TESSELATION_THICKNESS = 5; private static final int TESSELATION_THICKNESS = 3; // Pixels
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030"); private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYE_THICKNESS = 8; private static final int RIGHT_EYE_THICKNESS = 5; // Pixels
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030"); private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_EYEBROW_THICKNESS = 8; private static final int RIGHT_EYEBROW_THICKNESS = 5; // Pixels
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30"); private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYE_THICKNESS = 8; private static final int LEFT_EYE_THICKNESS = 5; // Pixels
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30"); private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
private static final int LEFT_EYEBROW_THICKNESS = 8; private static final int LEFT_EYEBROW_THICKNESS = 5; // Pixels
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0"); private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
private static final int FACE_OVAL_THICKNESS = 8; private static final int FACE_OVAL_THICKNESS = 5; // Pixels
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0"); private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
private static final int LIPS_THICKNESS = 8; private static final int LIPS_THICKNESS = 5; // Pixels
private Bitmap latest; private Bitmap latest;
public FaceMeshResultImageView(Context context) { public FaceMeshResultImageView(Context context) {

View File

@ -176,9 +176,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline(); setupStaticImageModePipeline();
} }
// Reads images from gallery. // Reads images from gallery.
Intent gallery = Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
}); });
imageView = new FaceMeshResultImageView(this); imageView = new FaceMeshResultImageView(this);
} }
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline(); stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO); setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery. // Reads video from gallery.
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
}); });
} }

View File

@ -28,7 +28,16 @@ import java.util.List;
public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> { public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
private static final String TAG = "HandsResultGlRenderer"; private static final String TAG = "HandsResultGlRenderer";
private static final float CONNECTION_THICKNESS = 20.0f; private static final float[] LEFT_HAND_CONNECTION_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float[] RIGHT_HAND_CONNECTION_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float CONNECTION_THICKNESS = 25.0f;
private static final float[] LEFT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float[] RIGHT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float HOLLOW_CIRCLE_RADIUS = 0.01f;
private static final float[] LEFT_HAND_LANDMARK_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
private static final float[] RIGHT_HAND_LANDMARK_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
private static final float LANDMARK_RADIUS = 0.008f;
private static final int NUM_SEGMENTS = 120;
private static final String VERTEX_SHADER = private static final String VERTEX_SHADER =
"uniform mat4 uProjectionMatrix;\n" "uniform mat4 uProjectionMatrix;\n"
+ "attribute vec4 vPosition;\n" + "attribute vec4 vPosition;\n"
@ -37,12 +46,14 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
+ "}"; + "}";
private static final String FRAGMENT_SHADER = private static final String FRAGMENT_SHADER =
"precision mediump float;\n" "precision mediump float;\n"
+ "uniform vec4 uColor;\n"
+ "void main() {\n" + "void main() {\n"
+ " gl_FragColor = vec4(0, 1, 0, 1);\n" + " gl_FragColor = uColor;\n"
+ "}"; + "}";
private int program; private int program;
private int positionHandle; private int positionHandle;
private int projectionMatrixHandle; private int projectionMatrixHandle;
private int colorHandle;
private int loadShader(int type, String shaderCode) { private int loadShader(int type, String shaderCode) {
int shader = GLES20.glCreateShader(type); int shader = GLES20.glCreateShader(type);
@ -61,6 +72,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glLinkProgram(program); GLES20.glLinkProgram(program);
positionHandle = GLES20.glGetAttribLocation(program, "vPosition"); positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix"); projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
} }
@Override @Override
@ -74,7 +86,22 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
int numHands = result.multiHandLandmarks().size(); int numHands = result.multiHandLandmarks().size();
for (int i = 0; i < numHands; ++i) { for (int i = 0; i < numHands; ++i) {
drawLandmarks(result.multiHandLandmarks().get(i).getLandmarkList()); boolean isLeftHand = result.multiHandedness().get(i).getLabel().equals("Left");
drawConnections(
result.multiHandLandmarks().get(i).getLandmarkList(),
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
for (NormalizedLandmark landmark : result.multiHandLandmarks().get(i).getLandmarkList()) {
// Draws the landmark.
drawCircle(
landmark.getX(),
landmark.getY(),
isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
// Draws a hollow circle around the landmark.
drawHollowCircle(
landmark.getX(),
landmark.getY(),
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
}
} }
} }
@ -87,7 +114,8 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDeleteProgram(program); GLES20.glDeleteProgram(program);
} }
private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) { private void drawConnections(List<NormalizedLandmark> handLandmarkList, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
for (Hands.Connection c : Hands.HAND_CONNECTIONS) { for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
NormalizedLandmark start = handLandmarkList.get(c.start()); NormalizedLandmark start = handLandmarkList.get(c.start());
NormalizedLandmark end = handLandmarkList.get(c.end()); NormalizedLandmark end = handLandmarkList.get(c.end());
@ -103,4 +131,51 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2); GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
} }
} }
private void drawCircle(float x, float y, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
int vertexCount = NUM_SEGMENTS + 2;
float[] vertices = new float[vertexCount * 3];
vertices[0] = x;
vertices[1] = y;
vertices[2] = 0;
for (int i = 1; i < vertexCount; i++) {
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
int currentIndex = 3 * i;
vertices[currentIndex] = x + (float) (LANDMARK_RADIUS * Math.cos(angle));
vertices[currentIndex + 1] = y + (float) (LANDMARK_RADIUS * Math.sin(angle));
vertices[currentIndex + 2] = 0;
}
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertices.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertices);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_FAN, 0, vertexCount);
}
private void drawHollowCircle(float x, float y, float[] colorArray) {
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
int vertexCount = NUM_SEGMENTS + 1;
float[] vertices = new float[vertexCount * 3];
for (int i = 0; i < vertexCount; i++) {
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
int currentIndex = 3 * i;
vertices[currentIndex] = x + (float) (HOLLOW_CIRCLE_RADIUS * Math.cos(angle));
vertices[currentIndex + 1] = y + (float) (HOLLOW_CIRCLE_RADIUS * Math.sin(angle));
vertices[currentIndex + 2] = 0;
}
FloatBuffer vertexBuffer =
ByteBuffer.allocateDirect(vertices.length * 4)
.order(ByteOrder.nativeOrder())
.asFloatBuffer()
.put(vertices);
vertexBuffer.position(0);
GLES20.glEnableVertexAttribArray(positionHandle);
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
GLES20.glDrawArrays(GLES20.GL_LINE_STRIP, 0, vertexCount);
}
} }

View File

@ -31,10 +31,15 @@ import java.util.List;
public class HandsResultImageView extends AppCompatImageView { public class HandsResultImageView extends AppCompatImageView {
private static final String TAG = "HandsResultImageView"; private static final String TAG = "HandsResultImageView";
private static final int LANDMARK_COLOR = Color.RED; private static final int LEFT_HAND_CONNECTION_COLOR = Color.parseColor("#30FF30");
private static final int LANDMARK_RADIUS = 15; private static final int RIGHT_HAND_CONNECTION_COLOR = Color.parseColor("#FF3030");
private static final int CONNECTION_COLOR = Color.GREEN; private static final int CONNECTION_THICKNESS = 8; // Pixels
private static final int CONNECTION_THICKNESS = 10; private static final int LEFT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#30FF30");
private static final int RIGHT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#FF3030");
private static final int HOLLOW_CIRCLE_WIDTH = 5; // Pixels
private static final int LEFT_HAND_LANDMARK_COLOR = Color.parseColor("#FF3030");
private static final int RIGHT_HAND_LANDMARK_COLOR = Color.parseColor("#30FF30");
private static final int LANDMARK_RADIUS = 10; // Pixels
private Bitmap latest; private Bitmap latest;
public HandsResultImageView(Context context) { public HandsResultImageView(Context context) {
@ -62,7 +67,11 @@ public class HandsResultImageView extends AppCompatImageView {
int numHands = result.multiHandLandmarks().size(); int numHands = result.multiHandLandmarks().size();
for (int i = 0; i < numHands; ++i) { for (int i = 0; i < numHands; ++i) {
drawLandmarksOnCanvas( drawLandmarksOnCanvas(
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height); result.multiHandLandmarks().get(i).getLandmarkList(),
result.multiHandedness().get(i).getLabel().equals("Left"),
canvas,
width,
height);
} }
} }
@ -75,11 +84,16 @@ public class HandsResultImageView extends AppCompatImageView {
} }
private void drawLandmarksOnCanvas( private void drawLandmarksOnCanvas(
List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) { List<NormalizedLandmark> handLandmarkList,
boolean isLeftHand,
Canvas canvas,
int width,
int height) {
// Draw connections. // Draw connections.
for (Hands.Connection c : Hands.HAND_CONNECTIONS) { for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
Paint connectionPaint = new Paint(); Paint connectionPaint = new Paint();
connectionPaint.setColor(CONNECTION_COLOR); connectionPaint.setColor(
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
connectionPaint.setStrokeWidth(CONNECTION_THICKNESS); connectionPaint.setStrokeWidth(CONNECTION_THICKNESS);
NormalizedLandmark start = handLandmarkList.get(c.start()); NormalizedLandmark start = handLandmarkList.get(c.start());
NormalizedLandmark end = handLandmarkList.get(c.end()); NormalizedLandmark end = handLandmarkList.get(c.end());
@ -91,11 +105,23 @@ public class HandsResultImageView extends AppCompatImageView {
connectionPaint); connectionPaint);
} }
Paint landmarkPaint = new Paint(); Paint landmarkPaint = new Paint();
landmarkPaint.setColor(LANDMARK_COLOR); landmarkPaint.setColor(isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
// Draw landmarks. // Draws landmarks.
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) { for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
canvas.drawCircle( canvas.drawCircle(
landmark.getX() * width, landmark.getY() * height, LANDMARK_RADIUS, landmarkPaint); landmark.getX() * width, landmark.getY() * height, LANDMARK_RADIUS, landmarkPaint);
} }
// Draws hollow circles around landmarks.
landmarkPaint.setColor(
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
landmarkPaint.setStrokeWidth(HOLLOW_CIRCLE_WIDTH);
landmarkPaint.setStyle(Paint.Style.STROKE);
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
canvas.drawCircle(
landmark.getX() * width,
landmark.getY() * height,
LANDMARK_RADIUS + HOLLOW_CIRCLE_WIDTH,
landmarkPaint);
}
} }
} }

View File

@ -28,6 +28,7 @@ import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts; import androidx.activity.result.contract.ActivityResultContracts;
import androidx.exifinterface.media.ExifInterface; import androidx.exifinterface.media.ExifInterface;
// ContentResolver dependency // ContentResolver dependency
import com.google.mediapipe.formats.proto.LandmarkProto.Landmark;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
import com.google.mediapipe.solutioncore.CameraInput; import com.google.mediapipe.solutioncore.CameraInput;
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView; import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
@ -177,9 +178,9 @@ public class MainActivity extends AppCompatActivity {
setupStaticImageModePipeline(); setupStaticImageModePipeline();
} }
// Reads images from gallery. // Reads images from gallery.
Intent gallery = Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI); pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
imageGetter.launch(gallery); imageGetter.launch(pickImageIntent);
}); });
imageView = new HandsResultImageView(this); imageView = new HandsResultImageView(this);
} }
@ -193,7 +194,7 @@ public class MainActivity extends AppCompatActivity {
this, this,
HandsOptions.builder() HandsOptions.builder()
.setStaticImageMode(true) .setStaticImageMode(true)
.setMaxNumHands(1) .setMaxNumHands(2)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
@ -241,9 +242,9 @@ public class MainActivity extends AppCompatActivity {
stopCurrentPipeline(); stopCurrentPipeline();
setupStreamingModePipeline(InputSource.VIDEO); setupStreamingModePipeline(InputSource.VIDEO);
// Reads video from gallery. // Reads video from gallery.
Intent gallery = Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI); pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
videoGetter.launch(gallery); videoGetter.launch(pickVideoIntent);
}); });
} }
@ -269,7 +270,7 @@ public class MainActivity extends AppCompatActivity {
this, this,
HandsOptions.builder() HandsOptions.builder()
.setStaticImageMode(false) .setStaticImageMode(false)
.setMaxNumHands(1) .setMaxNumHands(2)
.setRunOnGpu(RUN_ON_GPU) .setRunOnGpu(RUN_ON_GPU)
.build()); .build());
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message)); hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
@ -336,7 +337,11 @@ public class MainActivity extends AppCompatActivity {
} }
private void logWristLandmark(HandsResult result, boolean showPixelValues) { private void logWristLandmark(HandsResult result, boolean showPixelValues) {
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST); if (result.multiHandLandmarks().isEmpty()) {
return;
}
NormalizedLandmark wristLandmark =
result.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates. // For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
if (showPixelValues) { if (showPixelValues) {
int width = result.inputBitmap().getWidth(); int width = result.inputBitmap().getWidth();
@ -353,5 +358,16 @@ public class MainActivity extends AppCompatActivity {
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f", "MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
wristLandmark.getX(), wristLandmark.getY())); wristLandmark.getX(), wristLandmark.getY()));
} }
if (result.multiHandWorldLandmarks().isEmpty()) {
return;
}
Landmark wristWorldLandmark =
result.multiHandWorldLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
Log.i(
TAG,
String.format(
"MediaPipe Hand wrist world coordinates (in meters with the origin at the hand's"
+ " approximate geometric center): x=%f m, y=%f m, z=%f m",
wristWorldLandmark.getX(), wristWorldLandmark.getY(), wristWorldLandmark.getZ()));
} }
} }

View File

@ -37,7 +37,7 @@ android_binary(
srcs = glob(["*.java"]), srcs = glob(["*.java"]),
assets = [ assets = [
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb", "//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
"//mediapipe/modules/palm_detection:palm_detection.tflite", "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
], ],
assets_dir = "", assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",

View File

@ -37,9 +37,11 @@ android_binary(
srcs = glob(["*.java"]), srcs = glob(["*.java"]),
assets = [ assets = [
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb", "//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
], ],
assets_dir = "", assets_dir = "",
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
@ -53,6 +55,7 @@ android_binary(
"outputVideoStreamName": "output_video", "outputVideoStreamName": "output_video",
"flipFramesVertically": "True", "flipFramesVertically": "True",
"converterNumBuffers": "2", "converterNumBuffers": "2",
# "modelComplexity": "0" # 0=lite, 1=heavy, not specified=heavy
}, },
multidex = "native", multidex = "native",
deps = [ deps = [

View File

@ -14,6 +14,9 @@
package com.google.mediapipe.apps.handtrackinggpu; package com.google.mediapipe.apps.handtrackinggpu;
import android.content.pm.ApplicationInfo;
import android.content.pm.PackageManager;
import android.content.pm.PackageManager.NameNotFoundException;
import android.os.Bundle; import android.os.Bundle;
import android.util.Log; import android.util.Log;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
@ -30,6 +33,7 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
private static final String TAG = "MainActivity"; private static final String TAG = "MainActivity";
private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands"; private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
private static final String INPUT_MODEL_COMPLEXITY = "model_complexity";
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks"; private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
// Max number of hands to detect/process. // Max number of hands to detect/process.
private static final int NUM_HANDS = 2; private static final int NUM_HANDS = 2;
@ -38,9 +42,22 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
protected void onCreate(Bundle savedInstanceState) { protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState); super.onCreate(savedInstanceState);
ApplicationInfo applicationInfo;
try {
applicationInfo =
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
} catch (NameNotFoundException e) {
throw new AssertionError(e);
}
AndroidPacketCreator packetCreator = processor.getPacketCreator(); AndroidPacketCreator packetCreator = processor.getPacketCreator();
Map<String, Packet> inputSidePackets = new HashMap<>(); Map<String, Packet> inputSidePackets = new HashMap<>();
inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS)); inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
if (applicationInfo.metaData.containsKey("modelComplexity")) {
inputSidePackets.put(
INPUT_MODEL_COMPLEXITY,
packetCreator.createInt32(applicationInfo.metaData.getInt("modelComplexity")));
}
processor.setInputSidePackets(inputSidePackets); processor.setInputSidePackets(inputSidePackets);
// To show verbose logging, run: // To show verbose logging, run:

View File

@ -282,8 +282,12 @@ absl::Status KinematicPathSolver::UpdatePixelsPerDegree(
absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location, absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
const int max_location) { const int max_location) {
RET_CHECK(initialized_) if (!initialized_) {
<< "UpdateMinMaxLocation called before first observation added."; max_location_ = max_location;
min_location_ = min_location;
return absl::OkStatus();
}
double prior_distance = max_location_ - min_location_; double prior_distance = max_location_ - min_location_;
double updated_distance = max_location - min_location; double updated_distance = max_location - min_location;
double scale_change = updated_distance / prior_distance; double scale_change = updated_distance / prior_distance;

View File

@ -435,6 +435,23 @@ TEST(KinematicPathSolverTest, PassBorderTest) {
EXPECT_FLOAT_EQ(state, 404.56668); EXPECT_FLOAT_EQ(state, 404.56668);
} }
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfUninitialized) {
KinematicOptions options;
options.set_min_motion_to_reframe(2.0);
options.set_max_velocity(1000);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
}
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfInitialized) {
KinematicOptions options;
options.set_min_motion_to_reframe(2.0);
options.set_max_velocity(1000);
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
}
} // namespace } // namespace
} // namespace autoflip } // namespace autoflip
} // namespace mediapipe } // namespace mediapipe

View File

@ -55,7 +55,7 @@ objc_library(
name = "HandDetectionGpuAppLibrary", name = "HandDetectionGpuAppLibrary",
data = [ data = [
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph", "//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph",
"//mediapipe/modules/palm_detection:palm_detection.tflite", "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
], ],
deps = [ deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -64,7 +64,7 @@ objc_library(
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb", "//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/hand_landmark:handedness.txt", "//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection.tflite", "//mediapipe/modules/palm_detection:palm_detection_full.tflite",
], ],
deps = [ deps = [
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary", "//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",

View File

@ -0,0 +1,40 @@
// Copyright 2021 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
// Joint of a 3D human model (e.g. elbow, knee, wrist). Contains 3D rotation of
// the joint and its visibility.
message Joint {
// Joint rotation in 6D contineous representation.
// Such representation is more sutable for NN model training and can be
// converted to quaternions and Euler angles if needed. Details can be found
// in https://arxiv.org/abs/1812.07035.
repeated float rotation_6d = 1;
// Joint visibility.
// Float score of whether joint is visible: present on the screen and not
// occluded by other objects. Depending on the model, visibility value is
// either a sigmoid or an argument of sigmoid, but in any case higher value
// indicates higher probability of joint being visible. Should stay unset if
// not supported.
optional float visibility = 2;
}
// Group of Joint protos.
message JointList {
repeated Joint joint = 1;
}

View File

@ -109,8 +109,7 @@ class Image {
return gpu_buffer_.GetCVPixelBufferRef(); return gpu_buffer_.GetCVPixelBufferRef();
} }
#else #else
const mediapipe::GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr() mediapipe::GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
const {
if (use_gpu_ == false) ConvertToGpu(); if (use_gpu_ == false) ConvertToGpu();
return gpu_buffer_.GetGlTextureBufferSharedPtr(); return gpu_buffer_.GetGlTextureBufferSharedPtr();
} }

View File

@ -22,9 +22,8 @@
// For consistency, we now set MEDIAPIPE_MOBILE there too. However, for the sake // For consistency, we now set MEDIAPIPE_MOBILE there too. However, for the sake
// of projects that may want to build MediaPipe using alternative build systems, // of projects that may want to build MediaPipe using alternative build systems,
// we also try to set platform-specific defines in this header if missing. // we also try to set platform-specific defines in this header if missing.
#if !defined(MEDIAPIPE_MOBILE) && \ #if !defined(MEDIAPIPE_MOBILE) && \
(defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) || \ (defined(__ANDROID__) || defined(__EMSCRIPTEN__))
defined(__EMSCRIPTEN__))
#define MEDIAPIPE_MOBILE #define MEDIAPIPE_MOBILE
#endif #endif
@ -36,6 +35,11 @@
#include "TargetConditionals.h" // for TARGET_OS_* #include "TargetConditionals.h" // for TARGET_OS_*
#if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX #if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX
#define MEDIAPIPE_IOS #define MEDIAPIPE_IOS
#if !defined(MEDIAPIPE_MOBILE) && !TARGET_OS_OSX
#define MEDIAPIPE_MOBILE
#endif
#endif #endif
#if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX #if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX
#define MEDIAPIPE_OSX #define MEDIAPIPE_OSX

View File

@ -65,9 +65,9 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
OptionsSyntaxUtil syntax_util; OptionsSyntaxUtil syntax_util;
for (auto& node : *config->mutable_node()) { for (auto& node : *config->mutable_node()) {
FieldData node_data = options_field_util::AsFieldData(node);
for (const std::string& option_def : node.option_value()) { for (const std::string& option_def : node.option_value()) {
FieldData node_data = options_field_util::AsFieldData(node);
std::vector<absl::string_view> tag_and_name = std::vector<absl::string_view> tag_and_name =
syntax_util.StrSplitTags(option_def); syntax_util.StrSplitTags(option_def);
std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]); std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]);
@ -96,6 +96,7 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
status.Update(MergeField(node_path, packet_data, &node_options)); status.Update(MergeField(node_path, packet_data, &node_options));
options_field_util::SetOptionsMessage(node_options, &node); options_field_util::SetOptionsMessage(node_options, &node);
} }
node.clear_option_value();
} }
return status; return status;
} }

View File

@ -137,7 +137,6 @@ TEST_F(OptionsUtilTest, CopyLiteralOptions) {
NightLightCalculatorOptions expected_node_options; NightLightCalculatorOptions expected_node_options;
expected_node_options.add_num_lights(8); expected_node_options.add_num_lights(8);
expected_node.add_node_options()->PackFrom(expected_node_options); expected_node.add_node_options()->PackFrom(expected_node_options);
*expected_node.add_option_value() = "num_lights:options/chain_length";
EXPECT_THAT(actual_node, EqualsProto(expected_node)); EXPECT_THAT(actual_node, EqualsProto(expected_node));
MP_EXPECT_OK(graph.StartRun({})); MP_EXPECT_OK(graph.StartRun({}));

View File

@ -656,7 +656,6 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
chain_length: 3 chain_length: 3
} }
} }
option_value: "chain_length:options/chain_length"
} }
type: "MoonSubgraph" type: "MoonSubgraph"
graph_options { graph_options {
@ -666,5 +665,84 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
EXPECT_THAT(moon_subgraph, mediapipe::EqualsProto(expected_graph)); EXPECT_THAT(moon_subgraph, mediapipe::EqualsProto(expected_graph));
} }
// Shows ExpandSubgraphs applied twice. "option_value" fields are evaluated
// and removed on the first ExpandSubgraphs call. If "option_value" fields
// are not removed during ExpandSubgraphs, they evaluate incorrectly on the
// second ExpandSubgraphs call and this test fails on "expected_node_options".
TEST(SubgraphExpansionTest, SimpleSubgraphOptionsTwice) {
GraphRegistry graph_registry;
// Register a simple-subgraph that accepts graph options.
auto moon_subgraph =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
type: "MoonSubgraph"
graph_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node: {
calculator: "MoonCalculator"
node_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
option_value: "chain_length:options/chain_length"
}
)pb");
graph_registry.Register("MoonSubgraph", moon_subgraph);
// Invoke the simple-subgraph with graph options.
// The empty NodeChainSubgraphOptions below allows "option_value" fields
// on "MoonCalculator" to evaluate incorrectly, if not removed.
auto sky_graph = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
graph_options: {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node: {
calculator: "MoonSubgraph"
options: {
[mediapipe.NodeChainSubgraphOptions.ext] {
node_type: "DoubleIntCalculator"
chain_length: 3
}
}
}
)pb");
// The first ExpandSubgraphs call evaluates and removes "option_value" fields.
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
auto expanded_1 = sky_graph;
// The second ExpandSubgraphs call has no effect on the expanded graph.
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
// Validate the expected node_options for the "MoonSubgraph".
// If the "option_value" fields are not removed during ExpandSubgraphs,
// this test fails with an incorrect value for "chain_length".
auto expected_node_options =
mediapipe::ParseTextProtoOrDie<mediapipe::NodeChainSubgraphOptions>(
"chain_length: 3");
mediapipe::NodeChainSubgraphOptions node_options;
sky_graph.node(0).node_options(0).UnpackTo(&node_options);
ASSERT_THAT(node_options, mediapipe::EqualsProto(expected_node_options));
// Validate the results from both ExpandSubgraphs() calls.
CalculatorGraphConfig expected_graph =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
graph_options {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
}
node {
name: "moonsubgraph__MoonCalculator"
calculator: "MoonCalculator"
node_options {
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {
chain_length: 3
}
}
}
)pb");
EXPECT_THAT(expanded_1, mediapipe::EqualsProto(expected_graph));
EXPECT_THAT(sky_graph, mediapipe::EqualsProto(expected_graph));
}
} // namespace } // namespace
} // namespace mediapipe } // namespace mediapipe

View File

@ -112,13 +112,13 @@ GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
GlTexture GlCalculatorHelperImpl::CreateSourceTexture( GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const GpuBuffer& gpu_buffer, int plane) { const GpuBuffer& gpu_buffer, int plane) {
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(plane)); return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(plane));
} }
GlTexture GlCalculatorHelperImpl::CreateSourceTexture( GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
const ImageFrame& image_frame) { const ImageFrame& image_frame) {
auto gpu_buffer = GpuBuffer::CopyingImageFrame(image_frame); auto gpu_buffer = GpuBuffer::CopyingImageFrame(image_frame);
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(0)); return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(0));
} }
template <> template <>
@ -149,7 +149,7 @@ GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(
GpuBuffer gpu_buffer = GpuBuffer gpu_buffer =
gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format); gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format);
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureWriteView(0)); return MapGpuBuffer(gpu_buffer, gpu_buffer.GetWriteView<GlTextureView>(0));
} }
} // namespace mediapipe } // namespace mediapipe

View File

@ -224,7 +224,8 @@ void GlTextureBuffer::WaitForConsumersOnGpu() {
// precisely, on only one GL context. // precisely, on only one GL context.
} }
GlTextureView GlTextureBuffer::GetGlTextureReadView( GlTextureView GlTextureBuffer::GetReadView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const { std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
auto gl_context = GlContext::GetCurrent(); auto gl_context = GlContext::GetCurrent();
CHECK(gl_context); CHECK(gl_context);
@ -241,7 +242,8 @@ GlTextureView GlTextureBuffer::GetGlTextureReadView(
nullptr); nullptr);
} }
GlTextureView GlTextureBuffer::GetGlTextureWriteView( GlTextureView GlTextureBuffer::GetWriteView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) { std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
auto gl_context = GlContext::GetCurrent(); auto gl_context = GlContext::GetCurrent();
CHECK(gl_context); CHECK(gl_context);
@ -341,7 +343,8 @@ std::unique_ptr<ImageFrame> GlTextureBuffer::AsImageFrame() const {
ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format()); ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format());
auto output = absl::make_unique<ImageFrame>( auto output = absl::make_unique<ImageFrame>(
image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary); image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary);
auto view = GetGlTextureReadView(nullptr, 0); auto view =
GetReadView(mediapipe::internal::types<GlTextureView>{}, nullptr, 0);
ReadTexture(view, format(), output->MutablePixelData(), ReadTexture(view, format(), output->MutablePixelData(),
output->PixelDataSize()); output->PixelDataSize());
return output; return output;

View File

@ -32,7 +32,9 @@ namespace mediapipe {
class GlCalculatorHelperImpl; class GlCalculatorHelperImpl;
// Implements a GPU memory buffer as an OpenGL texture. For internal use. // Implements a GPU memory buffer as an OpenGL texture. For internal use.
class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage { class GlTextureBuffer
: public mediapipe::internal::GpuBufferStorageImpl<
GlTextureBuffer, mediapipe::internal::ViewProvider<GlTextureView>> {
public: public:
// This is called when the texture buffer is deleted. It is passed a sync // This is called when the texture buffer is deleted. It is passed a sync
// token created at that time on the GlContext. If the GlTextureBuffer has // token created at that time on the GlContext. If the GlTextureBuffer has
@ -86,11 +88,12 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
int height() const { return height_; } int height() const { return height_; }
GpuBufferFormat format() const { return format_; } GpuBufferFormat format() const { return format_; }
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer, GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
int plane) const override; std::shared_ptr<GpuBuffer> gpu_buffer,
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const override;
int plane) override; GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
void ViewDoneWriting(const GlTextureView& view) override; std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
std::unique_ptr<ImageFrame> AsImageFrame() const override; std::unique_ptr<ImageFrame> AsImageFrame() const override;
// If this texture is going to be used outside of the context that produced // If this texture is going to be used outside of the context that produced
@ -142,6 +145,8 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
// Returns true on success. // Returns true on success.
bool CreateInternal(const void* data, int alignment = 4); bool CreateInternal(const void* data, int alignment = 4);
void ViewDoneWriting(const GlTextureView& view);
friend class GlCalculatorHelperImpl; friend class GlCalculatorHelperImpl;
GLuint name_ = 0; GLuint name_ = 0;

View File

@ -15,6 +15,7 @@
#ifndef MEDIAPIPE_GPU_GPU_BUFFER_H_ #ifndef MEDIAPIPE_GPU_GPU_BUFFER_H_
#define MEDIAPIPE_GPU_GPU_BUFFER_H_ #define MEDIAPIPE_GPU_GPU_BUFFER_H_
#include <memory>
#include <utility> #include <utility>
#include "mediapipe/framework/formats/image_frame.h" #include "mediapipe/framework/formats/image_frame.h"
@ -23,6 +24,10 @@
#include "mediapipe/gpu/gpu_buffer_format.h" #include "mediapipe/gpu/gpu_buffer_format.h"
#include "mediapipe/gpu/gpu_buffer_storage.h" #include "mediapipe/gpu/gpu_buffer_storage.h"
// Note: these headers are needed for the legacy storage APIs. Do not add more
// storage-specific headers here. See WebGpuTextureBuffer/View for an example
// of adding a new storage and view.
#if defined(__APPLE__) #if defined(__APPLE__)
#include <CoreVideo/CoreVideo.h> #include <CoreVideo/CoreVideo.h>
@ -31,9 +36,7 @@
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#include "mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h" #include "mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #else
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
#include "mediapipe/gpu/gl_texture_buffer.h" #include "mediapipe/gpu/gl_texture_buffer.h"
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
@ -60,19 +63,28 @@ class GpuBuffer {
// are not portable. Applications and calculators should normally obtain // are not portable. Applications and calculators should normally obtain
// GpuBuffers in a portable way from the framework, e.g. using // GpuBuffers in a portable way from the framework, e.g. using
// GpuBufferMultiPool. // GpuBufferMultiPool.
explicit GpuBuffer(
std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage)
: storage_(std::move(storage)) {}
// Note: these constructors and accessors for specific storage types exist
// for backwards compatibility reasons. Do not add new ones.
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
explicit GpuBuffer(CFHolder<CVPixelBufferRef> pixel_buffer) explicit GpuBuffer(CFHolder<CVPixelBufferRef> pixel_buffer)
: pixel_buffer_(std::move(pixel_buffer)) {} : storage_(std::make_shared<GpuBufferStorageCvPixelBuffer>(
std::move(pixel_buffer))) {}
explicit GpuBuffer(CVPixelBufferRef pixel_buffer) explicit GpuBuffer(CVPixelBufferRef pixel_buffer)
: pixel_buffer_(pixel_buffer) {} : storage_(
std::make_shared<GpuBufferStorageCvPixelBuffer>(pixel_buffer)) {}
CVPixelBufferRef GetCVPixelBufferRef() const { return *pixel_buffer_; } CVPixelBufferRef GetCVPixelBufferRef() const {
auto p = storage_->down_cast<GpuBufferStorageCvPixelBuffer>();
if (p) return **p;
return nullptr;
}
#else #else
explicit GpuBuffer(GlTextureBufferSharedPtr texture_buffer) GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
: texture_buffer_(std::move(texture_buffer)) {} return internal_storage<GlTextureBuffer>();
const GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr() const {
return texture_buffer_;
} }
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER #endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
@ -93,14 +105,26 @@ class GpuBuffer {
// Allow assignment from nullptr. // Allow assignment from nullptr.
GpuBuffer& operator=(std::nullptr_t other); GpuBuffer& operator=(std::nullptr_t other);
GlTextureView GetGlTextureReadView(int plane) const { // Gets a read view of the specified type. The arguments depend on the
return current_storage().GetGlTextureReadView( // specific view type; see the corresponding ViewProvider.
std::make_shared<GpuBuffer>(*this), plane); template <class View, class... Args>
auto GetReadView(Args... args) const {
return current_storage()
.down_cast<mediapipe::internal::ViewProvider<View>>()
->GetReadView(mediapipe::internal::types<View>{},
std::make_shared<GpuBuffer>(*this),
std::forward<Args>(args)...);
} }
GlTextureView GetGlTextureWriteView(int plane) { // Gets a write view of the specified type. The arguments depend on the
return current_storage().GetGlTextureWriteView( // specific view type; see the corresponding ViewProvider.
std::make_shared<GpuBuffer>(*this), plane); template <class View, class... Args>
auto GetWriteView(Args... args) {
return current_storage()
.down_cast<mediapipe::internal::ViewProvider<View>>()
->GetWriteView(mediapipe::internal::types<View>{},
std::make_shared<GpuBuffer>(*this),
std::forward<Args>(args)...);
} }
// Make a GpuBuffer copying the data from an ImageFrame. // Make a GpuBuffer copying the data from an ImageFrame.
@ -115,77 +139,57 @@ class GpuBuffer {
return current_storage().AsImageFrame(); return current_storage().AsImageFrame();
} }
// Attempts to access an underlying storage object of the specified type.
// This method is meant for internal use: user code should access the contents
// using views.
template <class T>
std::shared_ptr<T> internal_storage() const {
if (storage_->down_cast<T>()) return std::static_pointer_cast<T>(storage_);
return nullptr;
}
private: private:
class PlaceholderGpuBufferStorage class PlaceholderGpuBufferStorage
: public mediapipe::internal::GpuBufferStorage { : public mediapipe::internal::GpuBufferStorageImpl<
PlaceholderGpuBufferStorage> {
public: public:
int width() const override { return 0; } int width() const override { return 0; }
int height() const override { return 0; } int height() const override { return 0; }
virtual GpuBufferFormat format() const override { virtual GpuBufferFormat format() const override {
return GpuBufferFormat::kUnknown; return GpuBufferFormat::kUnknown;
} }
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const override {
return {};
}
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override {
return {};
}
void ViewDoneWriting(const GlTextureView& view) override{};
std::unique_ptr<ImageFrame> AsImageFrame() const override { std::unique_ptr<ImageFrame> AsImageFrame() const override {
return nullptr; return nullptr;
} }
}; };
mediapipe::internal::GpuBufferStorage& no_storage() const { std::shared_ptr<mediapipe::internal::GpuBufferStorage>& no_storage() const {
static PlaceholderGpuBufferStorage placeholder; static auto placeholder =
std::static_pointer_cast<mediapipe::internal::GpuBufferStorage>(
std::make_shared<PlaceholderGpuBufferStorage>());
return placeholder; return placeholder;
} }
const mediapipe::internal::GpuBufferStorage& current_storage() const { const mediapipe::internal::GpuBufferStorage& current_storage() const {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER return *storage_;
if (pixel_buffer_ != nullptr) return pixel_buffer_;
#else
if (texture_buffer_) return *texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return no_storage();
} }
mediapipe::internal::GpuBufferStorage& current_storage() { mediapipe::internal::GpuBufferStorage& current_storage() { return *storage_; }
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
if (pixel_buffer_ != nullptr) return pixel_buffer_;
#else
if (texture_buffer_) return *texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return no_storage();
}
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage_ =
GpuBufferStorageCvPixelBuffer pixel_buffer_; no_storage();
#else
GlTextureBufferSharedPtr texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
}; };
inline bool GpuBuffer::operator==(std::nullptr_t other) const { inline bool GpuBuffer::operator==(std::nullptr_t other) const {
return &current_storage() == &no_storage(); return storage_ == no_storage();
} }
inline bool GpuBuffer::operator==(const GpuBuffer& other) const { inline bool GpuBuffer::operator==(const GpuBuffer& other) const {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER return storage_ == other.storage_;
return pixel_buffer_ == other.pixel_buffer_;
#else
return texture_buffer_ == other.texture_buffer_;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
} }
inline GpuBuffer& GpuBuffer::operator=(std::nullptr_t other) { inline GpuBuffer& GpuBuffer::operator=(std::nullptr_t other) {
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER storage_ = no_storage();
pixel_buffer_.reset(other);
#else
texture_buffer_ = other;
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
return *this; return *this;
} }

View File

@ -12,27 +12,73 @@ class GpuBuffer;
namespace mediapipe { namespace mediapipe {
namespace internal { namespace internal {
using mediapipe::GlTextureView; template <class... T>
using mediapipe::GpuBuffer; struct types {};
using mediapipe::GpuBufferFormat;
class GlTextureViewManager { template <class V>
class ViewProvider;
// Note: this specialization temporarily lives here for backwards compatibility
// reasons. New specializations should be put in the same file as their view.
template <>
class ViewProvider<GlTextureView> {
public: public:
virtual ~GlTextureViewManager() = default; virtual ~ViewProvider() = default;
virtual GlTextureView GetGlTextureReadView( // Note that the view type is encoded in an argument to allow overloading,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const = 0; // so a storage class can implement GetRead/WriteView for multiple view types.
virtual GlTextureView GetGlTextureWriteView( // We cannot use a template function because it cannot be virtual; we want to
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) = 0; // have a virtual function here to enforce that different storages supporting
virtual void ViewDoneWriting(const GlTextureView& view) = 0; // the same view implement the same signature.
// Note that we allow different views to have custom signatures, providing
// additional view-specific arguments that may be needed.
virtual GlTextureView GetReadView(types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) const = 0;
virtual GlTextureView GetWriteView(types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) = 0;
}; };
class GpuBufferStorage : public GlTextureViewManager { class GpuBufferStorage {
public: public:
virtual ~GpuBufferStorage() = default; virtual ~GpuBufferStorage() = default;
virtual int width() const = 0; virtual int width() const = 0;
virtual int height() const = 0; virtual int height() const = 0;
virtual GpuBufferFormat format() const = 0; virtual GpuBufferFormat format() const = 0;
virtual std::unique_ptr<ImageFrame> AsImageFrame() const = 0; virtual std::unique_ptr<ImageFrame> AsImageFrame() const = 0;
// We can't use dynamic_cast since we want to support building without RTTI.
// The public methods delegate to the type-erased private virtual method.
template <class T>
T* down_cast() {
return static_cast<T*>(
const_cast<void*>(down_cast(tool::GetTypeHash<T>())));
}
template <class T>
const T* down_cast() const {
return static_cast<const T*>(down_cast(tool::GetTypeHash<T>()));
}
private:
virtual const void* down_cast(size_t type_hash) const = 0;
virtual size_t storage_type_hash() const = 0;
};
template <class T, class... U>
class GpuBufferStorageImpl : public GpuBufferStorage, public U... {
private:
virtual const void* down_cast(size_t type_hash) const override {
return down_cast_impl(type_hash, types<T, U...>{});
}
size_t storage_type_hash() const override { return tool::GetTypeHash<T>(); }
const void* down_cast_impl(size_t type_hash, types<>) const {
return nullptr;
}
template <class V, class... W>
const void* down_cast_impl(size_t type_hash, types<V, W...>) const {
if (type_hash == tool::GetTypeHash<V>()) return static_cast<const V*>(this);
return down_cast_impl(type_hash, types<W...>{});
}
}; };
} // namespace internal } // namespace internal

View File

@ -11,7 +11,8 @@ typedef CVOpenGLTextureRef CVTextureType;
typedef CVOpenGLESTextureRef CVTextureType; typedef CVOpenGLESTextureRef CVTextureType;
#endif // TARGET_OS_OSX #endif // TARGET_OS_OSX
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView( GlTextureView GpuBufferStorageCvPixelBuffer::GetReadView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const { std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
CVReturn err; CVReturn err;
auto gl_context = GlContext::GetCurrent(); auto gl_context = GlContext::GetCurrent();
@ -58,11 +59,13 @@ GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
#endif // TARGET_OS_OSX #endif // TARGET_OS_OSX
} }
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureWriteView( GlTextureView GpuBufferStorageCvPixelBuffer::GetWriteView(
mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) { std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
// For this storage there is currently no difference between read and write // For this storage there is currently no difference between read and write
// views, so we delegate to the read method. // views, so we delegate to the read method.
return GetGlTextureReadView(std::move(gpu_buffer), plane); return GetReadView(mediapipe::internal::types<GlTextureView>{},
std::move(gpu_buffer), plane);
} }
void GpuBufferStorageCvPixelBuffer::ViewDoneWriting(const GlTextureView& view) { void GpuBufferStorageCvPixelBuffer::ViewDoneWriting(const GlTextureView& view) {

View File

@ -12,7 +12,9 @@ namespace mediapipe {
class GlContext; class GlContext;
class GpuBufferStorageCvPixelBuffer class GpuBufferStorageCvPixelBuffer
: public mediapipe::internal::GpuBufferStorage, : public mediapipe::internal::GpuBufferStorageImpl<
GpuBufferStorageCvPixelBuffer,
mediapipe::internal::ViewProvider<GlTextureView>>,
public CFHolder<CVPixelBufferRef> { public CFHolder<CVPixelBufferRef> {
public: public:
using CFHolder<CVPixelBufferRef>::CFHolder; using CFHolder<CVPixelBufferRef>::CFHolder;
@ -28,12 +30,16 @@ class GpuBufferStorageCvPixelBuffer
return GpuBufferFormatForCVPixelFormat( return GpuBufferFormatForCVPixelFormat(
CVPixelBufferGetPixelFormatType(**this)); CVPixelBufferGetPixelFormatType(**this));
} }
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer, GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
int plane) const override; std::shared_ptr<GpuBuffer> gpu_buffer,
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const override;
int plane) override; GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
std::shared_ptr<GpuBuffer> gpu_buffer,
int plane) override;
std::unique_ptr<ImageFrame> AsImageFrame() const override; std::unique_ptr<ImageFrame> AsImageFrame() const override;
void ViewDoneWriting(const GlTextureView& view) override;
private:
void ViewDoneWriting(const GlTextureView& view);
}; };
} // namespace mediapipe } // namespace mediapipe

View File

@ -8,6 +8,9 @@ input_stream: "input_video"
# Max number of hands to detect/process. (int) # Max number of hands to detect/process. (int)
input_side_packet: "num_hands" input_side_packet: "num_hands"
# Model complexity (0 or 1). (int)
input_side_packet: "model_complexity"
# GPU image. (GpuBuffer) # GPU image. (GpuBuffer)
output_stream: "output_video" output_stream: "output_video"
# Collection of detected/predicted hands, each represented as a list of # Collection of detected/predicted hands, each represented as a list of
@ -39,6 +42,7 @@ node {
node { node {
calculator: "HandLandmarkTrackingGpu" calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:throttled_input_video" input_stream: "IMAGE:throttled_input_video"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "NUM_HANDS:num_hands" input_side_packet: "NUM_HANDS:num_hands"
output_stream: "LANDMARKS:hand_landmarks" output_stream: "LANDMARKS:hand_landmarks"
output_stream: "HANDEDNESS:handedness" output_stream: "HANDEDNESS:handedness"

View File

@ -370,6 +370,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
jmethodID processMethod = env->GetMethodID( jmethodID processMethod = env->GetMethodID(
callback_cls, process_method_name.c_str(), "(Ljava/util/List;)V"); callback_cls, process_method_name.c_str(), "(Ljava/util/List;)V");
// TODO: move to register natives.
jclass list_cls = env->FindClass("java/util/ArrayList"); jclass list_cls = env->FindClass("java/util/ArrayList");
jobject java_list = jobject java_list =
env->NewObject(list_cls, env->GetMethodID(list_cls, "<init>", "()V")); env->NewObject(list_cls, env->GetMethodID(list_cls, "<init>", "()V"));
@ -392,6 +393,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
RemovePacket(packet_handle); RemovePacket(packet_handle);
} }
env->DeleteLocalRef(callback_cls); env->DeleteLocalRef(callback_cls);
env->DeleteLocalRef(list_cls);
env->DeleteLocalRef(java_list); env->DeleteLocalRef(java_list);
VLOG(2) << "Returned from java callback."; VLOG(2) << "Returned from java callback.";
} }

View File

@ -56,8 +56,11 @@ JNIEXPORT jobjectArray JNICALL GRAPH_PROFILER_METHOD(
return nullptr; return nullptr;
} }
// TODO: move to register natives.
jclass byte_array_cls = env->FindClass("[B");
jobjectArray profiles = jobjectArray profiles =
env->NewObjectArray(num_profiles, env->FindClass("[B"), nullptr); env->NewObjectArray(num_profiles, byte_array_cls, nullptr);
env->DeleteLocalRef(byte_array_cls);
for (int i = 0; i < num_profiles; i++) { for (int i = 0; i < num_profiles; i++) {
const auto& profile = profiles_vec[i]; const auto& profile = profiles_vec[i];
int size = profile.ByteSize(); int size = profile.ByteSize();

View File

@ -143,8 +143,10 @@ jthrowable CreateMediaPipeException(JNIEnv* env, absl::Status status) {
env->SetByteArrayRegion(message_bytes, 0, length, env->SetByteArrayRegion(message_bytes, 0, length,
reinterpret_cast<jbyte*>(const_cast<char*>( reinterpret_cast<jbyte*>(const_cast<char*>(
std::string(status.message()).c_str()))); std::string(status.message()).c_str())));
return reinterpret_cast<jthrowable>( jthrowable result = reinterpret_cast<jthrowable>(
env->NewObject(status_cls, status_ctr, status.code(), message_bytes)); env->NewObject(status_cls, status_ctr, status.code(), message_bytes));
env->DeleteLocalRef(status_cls);
return result;
} }
bool ThrowIfError(JNIEnv* env, absl::Status status) { bool ThrowIfError(JNIEnv* env, absl::Status status) {
@ -165,11 +167,11 @@ SerializedMessageIds::SerializedMessageIds(JNIEnv* env, jobject data) {
class_registry.GetFieldName(serialized_message, "typeName"); class_registry.GetFieldName(serialized_message, "typeName");
std::string value_obfuscated = std::string value_obfuscated =
class_registry.GetFieldName(serialized_message, "value"); class_registry.GetFieldName(serialized_message, "value");
jclass j_class = reinterpret_cast<jclass>( jclass j_class = env->FindClass(serialized_message_obfuscated.c_str());
env->NewGlobalRef(env->FindClass(serialized_message_obfuscated.c_str())));
type_name_id = env->GetFieldID(j_class, type_name_obfuscated.c_str(), type_name_id = env->GetFieldID(j_class, type_name_obfuscated.c_str(),
"Ljava/lang/String;"); "Ljava/lang/String;");
value_id = env->GetFieldID(j_class, value_obfuscated.c_str(), "[B"); value_id = env->GetFieldID(j_class, value_obfuscated.c_str(), "[B");
env->DeleteLocalRef(j_class);
} }
} // namespace android } // namespace android

View File

@ -184,8 +184,11 @@ JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)(
} }
const std::vector<const ::mediapipe::proto_ns::MessageLite*>& proto_vector = const std::vector<const ::mediapipe::proto_ns::MessageLite*>& proto_vector =
get_proto_vector.value(); get_proto_vector.value();
// TODO: move to register natives.
jclass byte_array_cls = env->FindClass("[B");
jobjectArray proto_array = jobjectArray proto_array =
env->NewObjectArray(proto_vector.size(), env->FindClass("[B"), nullptr); env->NewObjectArray(proto_vector.size(), byte_array_cls, nullptr);
env->DeleteLocalRef(byte_array_cls);
for (int i = 0; i < proto_vector.size(); ++i) { for (int i = 0; i < proto_vector.size(); ++i) {
const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i]; const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i];

View File

@ -137,6 +137,7 @@ void RegisterGraphNatives(JNIEnv *env) {
AddJNINativeMethod(&graph_methods, graph, "nativeGetProfiler", "(J)J", AddJNINativeMethod(&graph_methods, graph, "nativeGetProfiler", "(J)J",
(void *)&GRAPH_METHOD(nativeGetProfiler)); (void *)&GRAPH_METHOD(nativeGetProfiler));
RegisterNativesVector(env, graph_class, graph_methods); RegisterNativesVector(env, graph_class, graph_methods);
env->DeleteLocalRef(graph_class);
} }
void RegisterGraphProfilerNatives(JNIEnv *env) { void RegisterGraphProfilerNatives(JNIEnv *env) {
@ -151,6 +152,7 @@ void RegisterGraphProfilerNatives(JNIEnv *env) {
&graph_profiler_methods, graph_profiler, "nativeGetCalculatorProfiles", &graph_profiler_methods, graph_profiler, "nativeGetCalculatorProfiles",
"(J)[[B", (void *)&GRAPH_PROFILER_METHOD(nativeGetCalculatorProfiles)); "(J)[[B", (void *)&GRAPH_PROFILER_METHOD(nativeGetCalculatorProfiles));
RegisterNativesVector(env, graph_profiler_class, graph_profiler_methods); RegisterNativesVector(env, graph_profiler_class, graph_profiler_methods);
env->DeleteLocalRef(graph_profiler_class);
} }
void RegisterAndroidAssetUtilNatives(JNIEnv *env) { void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
@ -171,6 +173,7 @@ void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
(void *)&ANDROID_ASSET_UTIL_METHOD(nativeInitializeAssetManager)); (void *)&ANDROID_ASSET_UTIL_METHOD(nativeInitializeAssetManager));
RegisterNativesVector(env, android_asset_util_class, RegisterNativesVector(env, android_asset_util_class,
android_asset_util_methods); android_asset_util_methods);
env->DeleteLocalRef(android_asset_util_class);
#endif #endif
} }
@ -191,6 +194,7 @@ void RegisterAndroidPacketCreatorNatives(JNIEnv *env) {
(void *)&ANDROID_PACKET_CREATOR_METHOD(nativeCreateRgbImageFrame)); (void *)&ANDROID_PACKET_CREATOR_METHOD(nativeCreateRgbImageFrame));
RegisterNativesVector(env, android_packet_creator_class, RegisterNativesVector(env, android_packet_creator_class,
android_packet_creator_methods); android_packet_creator_methods);
env->DeleteLocalRef(android_packet_creator_class);
#endif #endif
} }
@ -232,6 +236,7 @@ void RegisterPacketCreatorNatives(JNIEnv *env) {
"(JL" + serialized_message_name + ";)J", "(JL" + serialized_message_name + ";)J",
(void *)&PACKET_CREATOR_METHOD(nativeCreateProto)); (void *)&PACKET_CREATOR_METHOD(nativeCreateProto));
RegisterNativesVector(env, packet_creator_class, packet_creator_methods); RegisterNativesVector(env, packet_creator_class, packet_creator_methods);
env->DeleteLocalRef(packet_creator_class);
} }
void RegisterPacketGetterNatives(JNIEnv *env) { void RegisterPacketGetterNatives(JNIEnv *env) {
@ -260,6 +265,7 @@ void RegisterPacketGetterNatives(JNIEnv *env) {
"nativeGetFloat32Vector", "(J)[F", "nativeGetFloat32Vector", "(J)[F",
(void *)&PACKET_GETTER_METHOD(nativeGetFloat32Vector)); (void *)&PACKET_GETTER_METHOD(nativeGetFloat32Vector));
RegisterNativesVector(env, packet_getter_class, packet_getter_methods); RegisterNativesVector(env, packet_getter_class, packet_getter_methods);
env->DeleteLocalRef(packet_getter_class);
} }
void RegisterPacketNatives(JNIEnv *env) { void RegisterPacketNatives(JNIEnv *env) {
@ -278,6 +284,7 @@ void RegisterPacketNatives(JNIEnv *env) {
AddJNINativeMethod(&packet_methods, packet, "nativeIsEmpty", "(J)Z", AddJNINativeMethod(&packet_methods, packet, "nativeIsEmpty", "(J)Z",
(void *)&PACKET_METHOD(nativeIsEmpty)); (void *)&PACKET_METHOD(nativeIsEmpty));
RegisterNativesVector(env, packet_class, packet_methods); RegisterNativesVector(env, packet_class, packet_methods);
env->DeleteLocalRef(packet_class);
} }
void RegisterCompatNatives(JNIEnv *env) { void RegisterCompatNatives(JNIEnv *env) {
@ -293,6 +300,7 @@ void RegisterCompatNatives(JNIEnv *env) {
"(I)J", "(I)J",
(void *)&COMPAT_METHOD(getCurrentNativeEGLSurface)); (void *)&COMPAT_METHOD(getCurrentNativeEGLSurface));
RegisterNativesVector(env, compat_class, compat_methods); RegisterNativesVector(env, compat_class, compat_methods);
env->DeleteLocalRef(compat_class);
} }
} // namespace } // namespace

View File

@ -95,13 +95,12 @@ public class ImageSolutionResult implements SolutionResult {
} }
} }
// Releases image packet and the underlying data. // Clears the underlying image packets to prevent the callers from accessing the invalid packets
void releaseImagePackets() { // outside of the output callback method.
imagePacket.release(); void clearImagePackets() {
imagePacket = null;
if (imageResultPackets != null) { if (imageResultPackets != null) {
for (Packet p : imageResultPackets) { imageResultPackets.clear();
p.release();
}
} }
} }
} }

View File

@ -90,12 +90,9 @@ public class OutputHandler<T extends SolutionResult> {
Log.e(TAG, "Error occurs when getting MediaPipe solution result. " + e); Log.e(TAG, "Error occurs when getting MediaPipe solution result. " + e);
} }
} finally { } finally {
for (Packet packet : packets) {
packet.release();
}
if (solutionResult instanceof ImageSolutionResult) { if (solutionResult instanceof ImageSolutionResult) {
ImageSolutionResult imageSolutionResult = (ImageSolutionResult) solutionResult; ImageSolutionResult imageSolutionResult = (ImageSolutionResult) solutionResult;
imageSolutionResult.releaseImagePackets(); imageSolutionResult.clearImagePackets();
} }
} }
} }

View File

@ -34,7 +34,6 @@ android_library(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"//mediapipe/framework/formats:detection_java_proto_lite", "//mediapipe/framework/formats:detection_java_proto_lite",
"//mediapipe/framework/formats:location_data_java_proto_lite",
"//mediapipe/java/com/google/mediapipe/framework:android_framework", "//mediapipe/java/com/google/mediapipe/framework:android_framework",
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_base", "//mediapipe/java/com/google/mediapipe/solutioncore:solution_base",
"//third_party:autovalue", "//third_party:autovalue",

View File

@ -17,7 +17,6 @@ package com.google.mediapipe.solutions.facedetection;
import android.content.Context; import android.content.Context;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.DetectionProto.Detection; import com.google.mediapipe.formats.proto.DetectionProto.Detection;
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
import com.google.mediapipe.framework.MediaPipeException; import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet; import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutioncore.ErrorListener; import com.google.mediapipe.solutioncore.ErrorListener;
@ -104,27 +103,4 @@ public class FaceDetection extends ImageSolutionBase {
this.outputHandler.setErrorListener(listener); this.outputHandler.setErrorListener(listener);
this.errorListener = listener; this.errorListener = listener;
} }
/**
* Gets a specific face keypoint by face index and face keypoint type.
*
* @param result the returned {@link FaceDetectionResult} object.
* @param faceIndex the face index. A smaller index maps to a detected face with a higher
* confidence score.
* @param faceKeypointType the face keypoint type defined in {@link FaceKeypoint}.
*/
public static RelativeKeypoint getFaceKeypoint(
FaceDetectionResult result,
int faceIndex,
@FaceKeypoint.FaceKeypointType int faceKeypointType) {
if (result == null
|| faceIndex >= result.multiFaceDetections().size()
|| faceKeypointType >= FaceKeypoint.NUM_KEY_POINTS) {
return RelativeKeypoint.getDefaultInstance();
}
Detection detection = result.multiFaceDetections().get(faceIndex);
float x = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getX();
float y = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getY();
return RelativeKeypoint.newBuilder().setX(x).setY(y).build();
}
} }

View File

@ -23,12 +23,13 @@ android_library(
"HandsResult.java", "HandsResult.java",
], ],
assets = [ assets = [
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection.tflite", "//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
"//mediapipe/modules/hand_landmark:handedness.txt",
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
], ],
assets_dir = "", assets_dir = "",
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"], javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],

View File

@ -18,9 +18,10 @@ import android.content.Context;
import com.google.auto.value.AutoValue; import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification; import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList;
import com.google.mediapipe.framework.MediaPipeException; import com.google.mediapipe.framework.MediaPipeException;
import com.google.mediapipe.framework.Packet; import com.google.mediapipe.framework.Packet;
import com.google.mediapipe.solutioncore.ErrorListener; import com.google.mediapipe.solutioncore.ErrorListener;
@ -28,7 +29,9 @@ import com.google.mediapipe.solutioncore.ImageSolutionBase;
import com.google.mediapipe.solutioncore.OutputHandler; import com.google.mediapipe.solutioncore.OutputHandler;
import com.google.mediapipe.solutioncore.ResultListener; import com.google.mediapipe.solutioncore.ResultListener;
import com.google.mediapipe.solutioncore.SolutionInfo; import com.google.mediapipe.solutioncore.SolutionInfo;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -85,10 +88,15 @@ public class Hands extends ImageSolutionBase {
private static final String CPU_GRAPH_NAME = "hand_landmark_tracking_cpu_image.binarypb"; private static final String CPU_GRAPH_NAME = "hand_landmark_tracking_cpu_image.binarypb";
private static final String IMAGE_INPUT_STREAM = "image"; private static final String IMAGE_INPUT_STREAM = "image";
private static final ImmutableList<String> OUTPUT_STREAMS = private static final ImmutableList<String> OUTPUT_STREAMS =
ImmutableList.of("multi_hand_landmarks", "multi_handedness", "throttled_image"); ImmutableList.of(
"multi_hand_landmarks",
"multi_hand_world_landmarks",
"multi_handedness",
"throttled_image");
private static final int LANDMARKS_INDEX = 0; private static final int LANDMARKS_INDEX = 0;
private static final int HANDEDNESS_INDEX = 1; private static final int WORLD_LANDMARKS_INDEX = 1;
private static final int INPUT_IMAGE_INDEX = 2; private static final int HANDEDNESS_INDEX = 2;
private static final int INPUT_IMAGE_INDEX = 3;
private final OutputHandler<HandsResult> outputHandler; private final OutputHandler<HandsResult> outputHandler;
/** /**
@ -109,8 +117,18 @@ public class Hands extends ImageSolutionBase {
reportError("Error occurs while getting MediaPipe hand landmarks.", e); reportError("Error occurs while getting MediaPipe hand landmarks.", e);
} }
try { try {
handsResultBuilder.setMultiHandedness( handsResultBuilder.setMultiHandWorldLandmarks(
getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser())); getProtoVector(packets.get(WORLD_LANDMARKS_INDEX), LandmarkList.parser()));
} catch (MediaPipeException e) {
reportError("Error occurs while getting MediaPipe hand world landmarks.", e);
}
try {
List<Classification> handednessList = new ArrayList<>();
for (ClassificationList protolist :
getProtoVector(packets.get(HANDEDNESS_INDEX), ClassificationList.parser())) {
handednessList.add(protolist.getClassification(0));
}
handsResultBuilder.setMultiHandedness(handednessList);
} catch (MediaPipeException e) { } catch (MediaPipeException e) {
reportError("Error occurs while getting MediaPipe handedness data.", e); reportError("Error occurs while getting MediaPipe handedness data.", e);
} }
@ -155,21 +173,4 @@ public class Hands extends ImageSolutionBase {
this.outputHandler.setErrorListener(listener); this.outputHandler.setErrorListener(listener);
this.errorListener = listener; this.errorListener = listener;
} }
/**
* Gets a specific hand landmark by hand index and hand landmark type.
*
* @param result the returned {@link HandsResult} object.
* @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
* @param landmarkType the hand landmark type defined in {@link HandLandmark}.
*/
public static NormalizedLandmark getHandLandmark(
HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
if (result == null
|| handIndex >= result.multiHandLandmarks().size()
|| landmarkType >= HandLandmark.NUM_LANDMARKS) {
return NormalizedLandmark.getDefaultInstance();
}
return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
}
} }

View File

@ -17,6 +17,7 @@ package com.google.mediapipe.solutions.hands;
import android.graphics.Bitmap; import android.graphics.Bitmap;
import com.google.auto.value.AutoBuilder; import com.google.auto.value.AutoBuilder;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
import com.google.mediapipe.formats.proto.ClassificationProto.Classification; import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
import com.google.mediapipe.framework.Packet; import com.google.mediapipe.framework.Packet;
@ -31,14 +32,17 @@ import java.util.List;
*/ */
public class HandsResult extends ImageSolutionResult { public class HandsResult extends ImageSolutionResult {
private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks; private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
private final ImmutableList<LandmarkList> multiHandWorldLandmarks;
private final ImmutableList<Classification> multiHandedness; private final ImmutableList<Classification> multiHandedness;
HandsResult( HandsResult(
ImmutableList<NormalizedLandmarkList> multiHandLandmarks, ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
ImmutableList<LandmarkList> multiHandWorldLandmarks,
ImmutableList<Classification> multiHandedness, ImmutableList<Classification> multiHandedness,
Packet imagePacket, Packet imagePacket,
long timestamp) { long timestamp) {
this.multiHandLandmarks = multiHandLandmarks; this.multiHandLandmarks = multiHandLandmarks;
this.multiHandWorldLandmarks = multiHandWorldLandmarks;
this.multiHandedness = multiHandedness; this.multiHandedness = multiHandedness;
this.timestamp = timestamp; this.timestamp = timestamp;
this.imagePacket = imagePacket; this.imagePacket = imagePacket;
@ -53,6 +57,12 @@ public class HandsResult extends ImageSolutionResult {
return multiHandLandmarks; return multiHandLandmarks;
} }
// Collection of detected/tracked hands' landmarks in real-world 3D coordinates that are in meters
// with the origin at the hand's approximate geometric center.
public ImmutableList<LandmarkList> multiHandWorldLandmarks() {
return multiHandWorldLandmarks;
}
// Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each // Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
// hand is composed of label and score. label is a string of value either "Left" or "Right". score // hand is composed of label and score. label is a string of value either "Left" or "Right". score
// is the estimated probability of the predicted handedness and is always greater than or equal to // is the estimated probability of the predicted handedness and is always greater than or equal to
@ -70,6 +80,8 @@ public class HandsResult extends ImageSolutionResult {
public abstract static class Builder { public abstract static class Builder {
abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value); abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);
abstract Builder setMultiHandWorldLandmarks(List<LandmarkList> value);
abstract Builder setMultiHandedness(List<Classification> value); abstract Builder setMultiHandedness(List<Classification> value);
abstract Builder setTimestamp(long value); abstract Builder setTimestamp(long value);

View File

@ -24,7 +24,6 @@ package(default_visibility = ["//visibility:public"])
exports_files([ exports_files([
"hand_landmark_full.tflite", "hand_landmark_full.tflite",
"hand_landmark_lite.tflite", "hand_landmark_lite.tflite",
"hand_landmark_sparse.tflite",
"handedness.txt", "handedness.txt",
]) ])
@ -56,6 +55,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator", "//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator", "//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator", "//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
], ],
) )
@ -75,6 +75,7 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator", "//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator", "//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator", "//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
], ],
) )

View File

@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# the absence of this packet so that they don't wait for it unnecessarily. # the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks" output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right). # Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList) # (ClassificationList)
output_stream: "HANDEDNESS:handedness" output_stream: "HANDEDNESS:handedness"
@ -77,11 +87,13 @@ node {
output_stream: "landmark_tensors" output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor" output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor" output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: { options: {
[mediapipe.SplitVectorCalculatorOptions.ext] { [mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 } ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 } ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 } ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
} }
} }
} }
@ -175,3 +187,33 @@ node {
input_stream: "NORM_RECT:hand_rect" input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks" output_stream: "NORM_LANDMARKS:hand_landmarks"
} }
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# the absence of this packet so that they don't wait for it unnecessarily. # the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks" output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right). # Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList) # (ClassificationList)
output_stream: "HANDEDNESS:handedness" output_stream: "HANDEDNESS:handedness"
@ -71,11 +81,13 @@ node {
output_stream: "landmark_tensors" output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor" output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor" output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: { options: {
[mediapipe.SplitVectorCalculatorOptions.ext] { [mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 } ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 } ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 } ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
} }
} }
} }
@ -169,3 +181,33 @@ node {
input_stream: "NORM_RECT:hand_rect" input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks" output_stream: "NORM_LANDMARKS:hand_landmarks"
} }
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int) # Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands" input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as # Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# inference latency generally go up with the model complexity. If unspecified, # well as inference latency generally go up with the model complexity. If
# functions as set to 1. (int) # unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of # framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily. # this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right), # Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto. # each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored, # Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped # i.e., taken with a front-facing/selfie camera with images flipped
# horizontally. # horizontally.
@ -89,6 +103,7 @@ node {
# Detects palms. # Detects palms.
node { node {
calculator: "PalmDetectionCpu" calculator: "PalmDetectionCpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image" input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections" output_stream: "DETECTIONS:all_palm_detections"
} }
@ -186,12 +201,13 @@ node {
input_stream: "IMAGE:image_for_landmarks" input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect" input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks" output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness" output_stream: "HANDEDNESS:single_handedness"
} }
# Collects the handedness for each single hand into a vector. Upon # Collects the handedness for each single hand into a vector. Upon receiving the
# receiving the BATCH_END timestamp, outputs a vector of classification at the # BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# BATCH_END timestamp. # timestamp.
node { node {
calculator: "EndLoopClassificationListCalculator" calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness" input_stream: "ITEM:single_handedness"
@ -218,6 +234,16 @@ node {
output_stream: "ITERABLE:multi_hand_landmarks" output_stream: "ITERABLE:multi_hand_landmarks"
} }
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the # Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END # BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp. # timestamp.

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int) # Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands" input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as # Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# inference latency generally go up with the model complexity. If unspecified, # well as inference latency generally go up with the model complexity. If
# functions as set to 1. (int) # unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
@ -25,6 +25,7 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# The throttled input image. (Image) # The throttled input image. (Image)
output_stream: "IMAGE:throttled_image" output_stream: "IMAGE:throttled_image"
# Collection of detected/predicted hands, each represented as a list of # Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>) # landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this # NOTE: there will not be an output packet in the LANDMARKS stream for this
@ -32,8 +33,22 @@ output_stream: "IMAGE:throttled_image"
# framework will internally inform the downstream calculators of the absence of # framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily. # this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right), # Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto. # each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored, # Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped # i.e., taken with a front-facing/selfie camera with images flipped
# horizontally. # horizontally.
@ -93,6 +108,7 @@ node {
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness" output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections" output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int) # Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands" input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as # Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# inference latency generally go up with the model complexity. If unspecified, # well as inference latency generally go up with the model complexity. If
# functions as set to 1. (int) # unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of # framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily. # this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right), # Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto. # each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored, # Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped # i.e., taken with a front-facing/selfie camera with images flipped
# horizontally. # horizontally.
@ -89,6 +103,7 @@ node {
# Detects palms. # Detects palms.
node { node {
calculator: "PalmDetectionGpu" calculator: "PalmDetectionGpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image" input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections" output_stream: "DETECTIONS:all_palm_detections"
} }
@ -187,12 +202,13 @@ node {
input_stream: "IMAGE:image_for_landmarks" input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect" input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks" output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness" output_stream: "HANDEDNESS:single_handedness"
} }
# Collects the handedness for each single hand into a vector. Upon # Collects the handedness for each single hand into a vector. Upon receiving the
# receiving the BATCH_END timestamp, outputs a vector of classification at the # BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# BATCH_END timestamp. # timestamp.
node { node {
calculator: "EndLoopClassificationListCalculator" calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness" input_stream: "ITEM:single_handedness"
@ -219,6 +235,16 @@ node {
output_stream: "ITERABLE:multi_hand_landmarks" output_stream: "ITERABLE:multi_hand_landmarks"
} }
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the # Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END # BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp. # timestamp.

View File

@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int) # Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands" input_side_packet: "NUM_HANDS:num_hands"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as # Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# inference latency generally go up with the model complexity. If unspecified, # well as inference latency generally go up with the model complexity. If
# functions as set to 1. (int) # unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# framework will internally inform the downstream calculators of the absence of # framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily. # this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right), # Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a Classification proto. # each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored, # Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped # i.e., taken with a front-facing/selfie camera with images flipped
# horizontally. # horizontally.
@ -93,6 +107,7 @@ node {
input_side_packet: "MODEL_COMPLEXITY:model_complexity" input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks" output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness" output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections" output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"

View File

@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList) # Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList) # Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks" output_stream: "FACE_LANDMARKS:face_landmarks"
@ -72,5 +77,6 @@ node {
calculator: "FaceLandmarkCpu" calculator: "FaceLandmarkCpu"
input_stream: "IMAGE:input_video" input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi" input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks" output_stream: "LANDMARKS:face_landmarks"
} }

View File

@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList) # Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList) # Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks" output_stream: "FACE_LANDMARKS:face_landmarks"
@ -72,5 +77,6 @@ node {
calculator: "FaceLandmarkGpu" calculator: "FaceLandmarkGpu"
input_stream: "IMAGE:input_video" input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi" input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks" output_stream: "LANDMARKS:face_landmarks"
} }

View File

@ -35,6 +35,7 @@
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" # input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" # input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" # input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" # input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# output_stream: "POSE_LANDMARKS:pose_landmarks" # output_stream: "POSE_LANDMARKS:pose_landmarks"
# output_stream: "FACE_LANDMARKS:face_landmarks" # output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# jitter. If unspecified, functions as set to true. (bool) # jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool) # landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
calculator: "FaceLandmarksFromPoseCpu" calculator: "FaceLandmarksFromPoseCpu"
input_stream: "IMAGE:image" input_stream: "IMAGE:image"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
output_stream: "FACE_LANDMARKS:face_landmarks" output_stream: "FACE_LANDMARKS:face_landmarks"
} }

View File

@ -35,6 +35,7 @@
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" # input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" # input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" # input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" # input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# output_stream: "POSE_LANDMARKS:pose_landmarks" # output_stream: "POSE_LANDMARKS:pose_landmarks"
# output_stream: "FACE_LANDMARKS:face_landmarks" # output_stream: "FACE_LANDMARKS:face_landmarks"
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# jitter. If unspecified, functions as set to true. (bool) # jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
# Whether landmarks on the previous image should be used to help localize # Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool) # landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
@ -135,5 +141,6 @@ node {
calculator: "FaceLandmarksFromPoseGpu" calculator: "FaceLandmarksFromPoseGpu"
input_stream: "IMAGE:image" input_stream: "IMAGE:image"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
output_stream: "FACE_LANDMARKS:face_landmarks" output_stream: "FACE_LANDMARKS:face_landmarks"
} }

View File

@ -21,13 +21,29 @@ licenses(["notice"])
package(default_visibility = ["//visibility:public"]) package(default_visibility = ["//visibility:public"])
exports_files(["palm_detection.tflite"]) exports_files([
"palm_detection_lite.tflite",
"palm_detection_full.tflite",
])
mediapipe_simple_subgraph(
name = "palm_detection_model_loader",
graph = "palm_detection_model_loader.pbtxt",
register_as = "PalmDetectionModelLoader",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/tflite:tflite_model_calculator",
"//mediapipe/calculators/util:local_file_contents_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph( mediapipe_simple_subgraph(
name = "palm_detection_cpu", name = "palm_detection_cpu",
graph = "palm_detection_cpu.pbtxt", graph = "palm_detection_cpu.pbtxt",
register_as = "PalmDetectionCpu", register_as = "PalmDetectionCpu",
deps = [ deps = [
":palm_detection_model_loader",
"//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator", "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -43,6 +59,7 @@ mediapipe_simple_subgraph(
graph = "palm_detection_gpu.pbtxt", graph = "palm_detection_gpu.pbtxt",
register_as = "PalmDetectionGpu", register_as = "PalmDetectionGpu",
deps = [ deps = [
":palm_detection_model_loader",
"//mediapipe/calculators/tensor:image_to_tensor_calculator", "//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator", "//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator", "//mediapipe/calculators/tensor:tensors_to_detections_calculator",
@ -52,10 +69,3 @@ mediapipe_simple_subgraph(
"//mediapipe/calculators/util:non_max_suppression_calculator", "//mediapipe/calculators/util:non_max_suppression_calculator",
], ],
) )
exports_files(
srcs = [
"palm_detection.tflite",
"palm_detection_labelmap.txt",
],
)

View File

@ -5,6 +5,11 @@ type: "PalmDetectionCpu"
# CPU image. (ImageFrame) # CPU image. (ImageFrame)
input_stream: "IMAGE:image" input_stream: "IMAGE:image"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Detected palms. (std::vector<Detection>) # Detected palms. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this # NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of palms detected. However, the MediaPipe # particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
output_stream: "LETTERBOX_PADDING:letterbox_padding" output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: { options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] { [mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128 output_tensor_width: 192
output_tensor_height: 128 output_tensor_height: 192
keep_aspect_ratio: true keep_aspect_ratio: true
output_tensor_float_range { output_tensor_float_range {
min: -1.0 min: 0.0
max: 1.0 max: 1.0
} }
border_mode: BORDER_ZERO border_mode: BORDER_ZERO
@ -39,6 +44,13 @@ node {
output_side_packet: "opresolver" output_side_packet: "opresolver"
} }
# Loads the palm detection TF Lite model.
node {
calculator: "PalmDetectionModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and # vector of tensors representing, for instance, detection boxes/keypoints and
# scores. # scores.
@ -47,9 +59,9 @@ node {
input_stream: "TENSORS:input_tensor" input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:detection_tensors" output_stream: "TENSORS:detection_tensors"
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
input_side_packet: "MODEL:model"
options: { options: {
[mediapipe.InferenceCalculatorOptions.ext] { [mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
delegate { xnnpack {} } delegate { xnnpack {} }
} }
} }
@ -65,8 +77,8 @@ node {
num_layers: 4 num_layers: 4
min_scale: 0.1484375 min_scale: 0.1484375
max_scale: 0.75 max_scale: 0.75
input_size_height: 128 input_size_width: 192
input_size_width: 128 input_size_height: 192
anchor_offset_x: 0.5 anchor_offset_x: 0.5
anchor_offset_y: 0.5 anchor_offset_y: 0.5
strides: 8 strides: 8
@ -90,7 +102,7 @@ node {
options: { options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] { [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1 num_classes: 1
num_boxes: 896 num_boxes: 2016
num_coords: 18 num_coords: 18
box_coord_offset: 0 box_coord_offset: 0
keypoint_coord_offset: 4 keypoint_coord_offset: 4
@ -100,10 +112,10 @@ node {
score_clipping_thresh: 100.0 score_clipping_thresh: 100.0
reverse_output_order: true reverse_output_order: true
x_scale: 128.0 x_scale: 192.0
y_scale: 128.0 y_scale: 192.0
h_scale: 128.0 w_scale: 192.0
w_scale: 128.0 h_scale: 192.0
min_score_thresh: 0.5 min_score_thresh: 0.5
} }
} }

Binary file not shown.

View File

@ -5,6 +5,11 @@ type: "PalmDetectionGpu"
# GPU image. (GpuBuffer) # GPU image. (GpuBuffer)
input_stream: "IMAGE:image" input_stream: "IMAGE:image"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Detected palms. (std::vector<Detection>) # Detected palms. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this # NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of palms detected. However, the MediaPipe # particular timestamp if none of palms detected. However, the MediaPipe
@ -21,11 +26,11 @@ node {
output_stream: "LETTERBOX_PADDING:letterbox_padding" output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: { options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] { [mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128 output_tensor_width: 192
output_tensor_height: 128 output_tensor_height: 192
keep_aspect_ratio: true keep_aspect_ratio: true
output_tensor_float_range { output_tensor_float_range {
min: -1.0 min: 0.0
max: 1.0 max: 1.0
} }
border_mode: BORDER_ZERO border_mode: BORDER_ZERO
@ -45,6 +50,13 @@ node {
} }
} }
# Loads the palm detection TF Lite model.
node {
calculator: "PalmDetectionModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a # Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and # vector of tensors representing, for instance, detection boxes/keypoints and
# scores. # scores.
@ -53,10 +65,10 @@ node {
input_stream: "TENSORS:input_tensor" input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:detection_tensors" output_stream: "TENSORS:detection_tensors"
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
input_side_packet: "MODEL:model"
options: { options: {
[mediapipe.InferenceCalculatorOptions.ext] { [mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite" delegate { gpu {} }
use_gpu: true
} }
} }
} }
@ -71,8 +83,8 @@ node {
num_layers: 4 num_layers: 4
min_scale: 0.1484375 min_scale: 0.1484375
max_scale: 0.75 max_scale: 0.75
input_size_height: 128 input_size_width: 192
input_size_width: 128 input_size_height: 192
anchor_offset_x: 0.5 anchor_offset_x: 0.5
anchor_offset_y: 0.5 anchor_offset_y: 0.5
strides: 8 strides: 8
@ -96,7 +108,7 @@ node {
options: { options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] { [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1 num_classes: 1
num_boxes: 896 num_boxes: 2016
num_coords: 18 num_coords: 18
box_coord_offset: 0 box_coord_offset: 0
keypoint_coord_offset: 4 keypoint_coord_offset: 4
@ -106,10 +118,10 @@ node {
score_clipping_thresh: 100.0 score_clipping_thresh: 100.0
reverse_output_order: true reverse_output_order: true
x_scale: 128.0 x_scale: 192.0
y_scale: 128.0 y_scale: 192.0
h_scale: 128.0 w_scale: 192.0
w_scale: 128.0 h_scale: 192.0
min_score_thresh: 0.5 min_score_thresh: 0.5
} }
} }

Binary file not shown.

View File

@ -0,0 +1,63 @@
# MediaPipe graph to load a selected palm detection TF Lite model.
type: "PalmDetectionModelLoader"
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
# latency generally go up with the model complexity. If unspecified, functions
# as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# TF Lite model represented as a FlatBuffer.
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
output_side_packet: "MODEL:model"
# Determines path to the desired pose landmark model file.
node {
calculator: "SwitchContainer"
input_side_packet: "SELECT:model_complexity"
output_side_packet: "PACKET:model_path"
options: {
[mediapipe.SwitchContainerOptions.ext] {
select: 1
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/palm_detection/palm_detection_lite.tflite"
}
}
}
}
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/palm_detection/palm_detection_full.tflite"
}
}
}
}
}
}
}
# Loads the file in the specified path into a blob.
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:model_path"
output_side_packet: "CONTENTS:model_blob"
options: {
[mediapipe.LocalFileContentsCalculatorOptions.ext]: {
text_mode: false
}
}
}
# Converts the input blob into a TF Lite model.
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:model_blob"
output_side_packet: "MODEL:model"
}

View File

@ -127,6 +127,7 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
- (void)videoUpdateIfNeeded { - (void)videoUpdateIfNeeded {
CMTime timestamp = [_videoItem currentTime]; CMTime timestamp = [_videoItem currentTime];
if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) { if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) {
CVPixelBufferRef pixelBuffer = CVPixelBufferRef pixelBuffer =
[_videoOutput copyPixelBufferForItemTime:timestamp itemTimeForDisplay:nil]; [_videoOutput copyPixelBufferForItemTime:timestamp itemTimeForDisplay:nil];
@ -139,6 +140,12 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
} }
CFRelease(pixelBuffer); CFRelease(pixelBuffer);
}); });
} else if (!_videoDisplayLink.paused && _videoPlayer.rate == 0) {
// The video might be paused by the operating system fo other reasons not catched by the context
// of an interruption. If this condition happens the @c _videoDisplayLink will not have a
// paused state, while the _videoPlayer will have rate 0 AKA paused. In this scenario we restart
// the video playback.
[_videoPlayer play];
} }
} }

View File

@ -124,7 +124,10 @@ class Hands(SolutionBase):
'handlandmarkcpu__ThresholdingCalculator.threshold': 'handlandmarkcpu__ThresholdingCalculator.threshold':
min_tracking_confidence, min_tracking_confidence,
}, },
outputs=['multi_hand_landmarks', 'multi_handedness']) outputs=[
'multi_hand_landmarks', 'multi_hand_world_landmarks',
'multi_handedness'
])
def process(self, image: np.ndarray) -> NamedTuple: def process(self, image: np.ndarray) -> NamedTuple:
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand. """Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
@ -137,10 +140,14 @@ class Hands(SolutionBase):
ValueError: If the input image is not three channel RGB. ValueError: If the input image is not three channel RGB.
Returns: Returns:
A NamedTuple object with two fields: a "multi_hand_landmarks" field that A NamedTuple object with the following fields:
contains the hand landmarks on each detected hand and a "multi_handedness" 1) a "multi_hand_landmarks" field that contains the hand landmarks on
field that contains the handedness (left v.s. right hand) of the detected each detected hand.
hand. 2) a "multi_hand_world_landmarks" field that contains the hand landmarks
on each detected hand in real-world 3D coordinates that are in meters
with the origin at the hand's approximate geometric center.
3) a "multi_handedness" field that contains the handedness (left v.s.
right hand) of the detected hand.
""" """
return super().process(input_data={'image': image}) return super().process(input_data={'image': image})

View File

@ -34,20 +34,20 @@ from mediapipe.python.solutions import hands as mp_hands
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata' TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
LITE_MODEL_DIFF_THRESHOLD = 25 # pixels LITE_MODEL_DIFF_THRESHOLD = 25 # pixels
FULL_MODEL_DIFF_THRESHOLD = 20 # pixels FULL_MODEL_DIFF_THRESHOLD = 20 # pixels
EXPECTED_HAND_COORDINATES_PREDICTION = [[[138, 343], [211, 330], [257, 286], EXPECTED_HAND_COORDINATES_PREDICTION = [[[580, 34], [504, 50], [459, 94],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]],
[[580, 34], [504, 50], [459, 94],
[429, 146], [397, 182], [507, 167], [429, 146], [397, 182], [507, 167],
[479, 245], [469, 292], [464, 330], [479, 245], [469, 292], [464, 330],
[545, 180], [534, 265], [533, 319], [545, 180], [534, 265], [533, 319],
[536, 360], [581, 172], [587, 252], [536, 360], [581, 172], [587, 252],
[593, 304], [599, 346], [615, 168], [593, 304], [599, 346], [615, 168],
[628, 223], [638, 258], [648, 288]]] [628, 223], [638, 258], [648, 288]],
[[138, 343], [211, 330], [257, 286],
[289, 237], [322, 203], [219, 216],
[238, 138], [249, 90], [253, 51],
[177, 204], [184, 115], [187, 60],
[185, 19], [138, 208], [131, 127],
[124, 77], [117, 36], [106, 222],
[92, 159], [79, 124], [68, 93]]]
class HandsTest(parameterized.TestCase): class HandsTest(parameterized.TestCase):

View File

@ -80,6 +80,7 @@ class Holistic(SolutionBase):
smooth_landmarks=True, smooth_landmarks=True,
enable_segmentation=False, enable_segmentation=False,
smooth_segmentation=True, smooth_segmentation=True,
refine_face_landmarks=False,
min_detection_confidence=0.5, min_detection_confidence=0.5,
min_tracking_confidence=0.5): min_tracking_confidence=0.5):
"""Initializes a MediaPipe Holistic object. """Initializes a MediaPipe Holistic object.
@ -98,6 +99,10 @@ class Holistic(SolutionBase):
smooth_segmentation: Whether to filter segmentation across different input smooth_segmentation: Whether to filter segmentation across different input
images to reduce jitter. See details in images to reduce jitter. See details in
https://solutions.mediapipe.dev/holistic#smooth_segmentation. https://solutions.mediapipe.dev/holistic#smooth_segmentation.
refine_face_landmarks: Whether to further refine the landmark coordinates
around the eyes and lips, and output additional landmarks around the
irises. Default to False. See details in
https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
detection to be considered successful. See details in detection to be considered successful. See details in
https://solutions.mediapipe.dev/holistic#min_detection_confidence. https://solutions.mediapipe.dev/holistic#min_detection_confidence.
@ -114,6 +119,7 @@ class Holistic(SolutionBase):
'enable_segmentation': enable_segmentation, 'enable_segmentation': enable_segmentation,
'smooth_segmentation': 'smooth_segmentation':
smooth_segmentation and not static_image_mode, smooth_segmentation and not static_image_mode,
'refine_face_landmarks': refine_face_landmarks,
'use_prev_landmarks': not static_image_mode, 'use_prev_landmarks': not static_image_mode,
}, },
calculator_params={ calculator_params={

View File

@ -99,18 +99,23 @@ class PoseTest(parameterized.TestCase):
results = holistic.process(image) results = holistic.process(image)
self.assertIsNone(results.pose_landmarks) self.assertIsNone(results.pose_landmarks)
@parameterized.named_parameters(('static_lite', True, 0, 3), @parameterized.named_parameters(('static_lite', True, 0, False, 3),
('static_full', True, 1, 3), ('static_full', True, 1, False, 3),
('static_heavy', True, 2, 3), ('static_heavy', True, 2, False, 3),
('video_lite', False, 0, 3), ('video_lite', False, 0, False, 3),
('video_full', False, 1, 3), ('video_full', False, 1, False, 3),
('video_heavy', False, 2, 3)) ('video_heavy', False, 2, False, 3),
def test_on_image(self, static_image_mode, model_complexity, num_frames): ('static_full_refine_face', True, 1, True, 3),
('video_full_refine_face', False, 1, True, 3))
def test_on_image(self, static_image_mode, model_complexity,
refine_face_landmarks, num_frames):
image_path = os.path.join(os.path.dirname(__file__), image_path = os.path.join(os.path.dirname(__file__),
'testdata/holistic.jpg') 'testdata/holistic.jpg')
image = cv2.imread(image_path) image = cv2.imread(image_path)
with mp_holistic.Holistic(static_image_mode=static_image_mode, with mp_holistic.Holistic(
model_complexity=model_complexity) as holistic: static_image_mode=static_image_mode,
model_complexity=model_complexity,
refine_face_landmarks=refine_face_landmarks) as holistic:
for idx in range(num_frames): for idx in range(num_frames):
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
self._annotate(image.copy(), results, idx) self._annotate(image.copy(), results, idx)
@ -129,7 +134,8 @@ class PoseTest(parameterized.TestCase):
EXPECTED_RIGHT_HAND_LANDMARKS, EXPECTED_RIGHT_HAND_LANDMARKS,
HAND_DIFF_THRESHOLD) HAND_DIFF_THRESHOLD)
# TODO: Verify the correctness of the face landmarks. # TODO: Verify the correctness of the face landmarks.
self.assertLen(results.face_landmarks.landmark, 468) self.assertLen(results.face_landmarks.landmark,
478 if refine_face_landmarks else 468)
if __name__ == '__main__': if __name__ == '__main__':