Project import generated by Copybara.
GitOrigin-RevId: d4a11282d20fe4d2e137f9032cf349750030dcb9
This commit is contained in:
parent
1faeaae7e5
commit
d4bb35fe5a
|
@ -257,8 +257,15 @@ glSurfaceView.setSolutionResultRenderer(new FaceDetectionResultGlRenderer());
|
|||
glSurfaceView.setRenderInputImage(true);
|
||||
faceDetection.setResultListener(
|
||||
faceDetectionResult -> {
|
||||
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
RelativeKeypoint noseTip =
|
||||
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
|
||||
faceDetectionResult
|
||||
.multiFaceDetections()
|
||||
.get(0)
|
||||
.getLocationData()
|
||||
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -297,10 +304,17 @@ FaceDetection faceDetection = new FaceDetection(this, faceDetectionOptions);
|
|||
FaceDetectionResultImageView imageView = new FaceDetectionResultImageView(this);
|
||||
faceDetection.setResultListener(
|
||||
faceDetectionResult -> {
|
||||
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
int width = faceDetectionResult.inputBitmap().getWidth();
|
||||
int height = faceDetectionResult.inputBitmap().getHeight();
|
||||
RelativeKeypoint noseTip =
|
||||
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
|
||||
faceDetectionResult
|
||||
.multiFaceDetections()
|
||||
.get(0)
|
||||
.getLocationData()
|
||||
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -334,9 +348,9 @@ ActivityResultLauncher<Intent> imageGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery = new Intent(
|
||||
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
```
|
||||
|
||||
#### Video Input
|
||||
|
@ -368,8 +382,15 @@ glSurfaceView.setRenderInputImage(true);
|
|||
|
||||
faceDetection.setResultListener(
|
||||
faceDetectionResult -> {
|
||||
if (faceDetectionResult.multiFaceDetections().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
RelativeKeypoint noseTip =
|
||||
FaceDetection.getFaceKeypoint(result, 0, FaceKeypoint.NOSE_TIP);
|
||||
faceDetectionResult
|
||||
.multiFaceDetections()
|
||||
.get(0)
|
||||
.getLocationData()
|
||||
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -398,9 +419,9 @@ ActivityResultLauncher<Intent> videoGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
```
|
||||
|
||||
## Example Apps
|
||||
|
|
|
@ -612,9 +612,9 @@ ActivityResultLauncher<Intent> imageGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery = new Intent(
|
||||
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
```
|
||||
|
||||
#### Video Input
|
||||
|
@ -678,9 +678,9 @@ ActivityResultLauncher<Intent> videoGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
```
|
||||
|
||||
## Example Apps
|
||||
|
|
|
@ -91,8 +91,10 @@ To detect initial hand locations, we designed a
|
|||
mobile real-time uses in a manner similar to the face detection model in
|
||||
[MediaPipe Face Mesh](./face_mesh.md). Detecting hands is a decidedly complex
|
||||
task: our
|
||||
[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite)
|
||||
has to work across a variety of hand sizes with a large scale span (~20x)
|
||||
[lite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite)
|
||||
and
|
||||
[full model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite)
|
||||
have to work across a variety of hand sizes with a large scale span (~20x)
|
||||
relative to the image frame and be able to detect occluded and self-occluded
|
||||
hands. Whereas faces have high contrast patterns, e.g., in the eye and mouth
|
||||
region, the lack of such features in hands makes it comparatively difficult to
|
||||
|
@ -195,6 +197,17 @@ of 21 hand landmarks and each landmark is composed of `x`, `y` and `z`. `x` and
|
|||
and the smaller the value the closer the landmark is to the camera. The
|
||||
magnitude of `z` uses roughly the same scale as `x`.
|
||||
|
||||
#### multi_hand_world_landmarks
|
||||
|
||||
Collection of detected/tracked hands, where each hand is represented as a list
|
||||
of 21 hand landmarks in world coordinates. Each landmark consists of the
|
||||
following:
|
||||
|
||||
* `x`, `y` and `z`: Real-world 3D coordinates in meters with the origin at the
|
||||
hand's approximate geometric center.
|
||||
* `visibility`: Identical to that defined in the corresponding
|
||||
[multi_hand_landmarks](#multi_hand_landmarks).
|
||||
|
||||
#### multi_handedness
|
||||
|
||||
Collection of handedness of the detected/tracked hands (i.e. is it a left or
|
||||
|
@ -262,6 +275,12 @@ with mp_hands.Hands(
|
|||
mp_drawing_styles.get_default_hand_connections_style())
|
||||
cv2.imwrite(
|
||||
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
|
||||
# Draw hand world landmarks.
|
||||
if not results.multi_hand_world_landmarks:
|
||||
continue
|
||||
for hand_world_landmarks in results.multi_hand_world_landmarks:
|
||||
mp_drawing.plot_landmarks(
|
||||
hand_world_landmarks, mp_hands.HAND_CONNECTIONS, azimuth=5)
|
||||
|
||||
# For webcam input:
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
@ -400,7 +419,7 @@ Supported configuration options:
|
|||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setStaticImageMode(false)
|
||||
.setMaxNumHands(1)
|
||||
.setMaxNumHands(2)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
hands.setErrorListener(
|
||||
|
@ -423,8 +442,11 @@ glSurfaceView.setRenderInputImage(true);
|
|||
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
if (result.multiHandLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
NormalizedLandmark wristLandmark =
|
||||
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -453,7 +475,7 @@ glSurfaceView.post(
|
|||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setStaticImageMode(true)
|
||||
.setMaxNumHands(1)
|
||||
.setMaxNumHands(2)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
|
||||
|
@ -464,10 +486,13 @@ Hands hands = new Hands(this, handsOptions);
|
|||
HandsResultImageView imageView = new HandsResultImageView(this);
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
if (result.multiHandLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
int width = handsResult.inputBitmap().getWidth();
|
||||
int height = handsResult.inputBitmap().getHeight();
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
NormalizedLandmark wristLandmark =
|
||||
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -501,9 +526,9 @@ ActivityResultLauncher<Intent> imageGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery = new Intent(
|
||||
Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
```
|
||||
|
||||
#### Video Input
|
||||
|
@ -513,7 +538,7 @@ imageGetter.launch(gallery);
|
|||
HandsOptions handsOptions =
|
||||
HandsOptions.builder()
|
||||
.setStaticImageMode(false)
|
||||
.setMaxNumHands(1)
|
||||
.setMaxNumHands(2)
|
||||
.setRunOnGpu(true).build();
|
||||
Hands hands = new Hands(this, handsOptions);
|
||||
hands.setErrorListener(
|
||||
|
@ -536,8 +561,11 @@ glSurfaceView.setRenderInputImage(true);
|
|||
|
||||
hands.setResultListener(
|
||||
handsResult -> {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(
|
||||
handsResult, 0, HandLandmark.WRIST);
|
||||
if (result.multiHandLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
NormalizedLandmark wristLandmark =
|
||||
handsResult.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
|
@ -566,9 +594,9 @@ ActivityResultLauncher<Intent> videoGetter =
|
|||
}
|
||||
}
|
||||
});
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
```
|
||||
|
||||
## Example Apps
|
||||
|
|
|
@ -159,6 +159,11 @@ images to reduce jitter. Ignored if [enable_segmentation](#enable_segmentation)
|
|||
is `false` or [static_image_mode](#static_image_mode) is `true`. Default to
|
||||
`true`.
|
||||
|
||||
#### refine_face_landmarks
|
||||
|
||||
Whether to further refine the landmark coordinates around the eyes and lips, and
|
||||
output additional landmarks around the irises. Default to `false`.
|
||||
|
||||
#### min_detection_confidence
|
||||
|
||||
Minimum confidence value (`[0.0, 1.0]`) from the person-detection model for the
|
||||
|
@ -241,6 +246,7 @@ Supported configuration options:
|
|||
* [smooth_landmarks](#smooth_landmarks)
|
||||
* [enable_segmentation](#enable_segmentation)
|
||||
* [smooth_segmentation](#smooth_segmentation)
|
||||
* [refine_face_landmarks](#refine_face_landmarks)
|
||||
* [min_detection_confidence](#min_detection_confidence)
|
||||
* [min_tracking_confidence](#min_tracking_confidence)
|
||||
|
||||
|
@ -256,7 +262,8 @@ IMAGE_FILES = []
|
|||
with mp_holistic.Holistic(
|
||||
static_image_mode=True,
|
||||
model_complexity=2,
|
||||
enable_segmentation=True) as holistic:
|
||||
enable_segmentation=True,
|
||||
refine_face_landmarks=True) as holistic:
|
||||
for idx, file in enumerate(IMAGE_FILES):
|
||||
image = cv2.imread(file)
|
||||
image_height, image_width, _ = image.shape
|
||||
|
@ -350,6 +357,7 @@ Supported configuration options:
|
|||
* [smoothLandmarks](#smooth_landmarks)
|
||||
* [enableSegmentation](#enable_segmentation)
|
||||
* [smoothSegmentation](#smooth_segmentation)
|
||||
* [refineFaceLandmarks](#refineFaceLandmarks)
|
||||
* [minDetectionConfidence](#min_detection_confidence)
|
||||
* [minTrackingConfidence](#min_tracking_confidence)
|
||||
|
||||
|
@ -421,6 +429,7 @@ holistic.setOptions({
|
|||
smoothLandmarks: true,
|
||||
enableSegmentation: true,
|
||||
smoothSegmentation: true,
|
||||
refineFaceLandmarks: true,
|
||||
minDetectionConfidence: 0.5,
|
||||
minTrackingConfidence: 0.5
|
||||
});
|
||||
|
|
|
@ -55,15 +55,14 @@ one over the other.
|
|||
### [Hands](https://google.github.io/mediapipe/solutions/hands)
|
||||
|
||||
* Palm detection model:
|
||||
[TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
|
||||
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_lite.tflite),
|
||||
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_full.tflite),
|
||||
[TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
|
||||
* Hand landmark model:
|
||||
[TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
|
||||
[TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
|
||||
[TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
|
||||
[TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
|
||||
* [Model card](https://mediapipe.page.link/handmc),
|
||||
[Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
|
||||
* [Model card](https://mediapipe.page.link/handmc)
|
||||
|
||||
### [Pose](https://google.github.io/mediapipe/solutions/pose)
|
||||
|
||||
|
|
|
@ -60,7 +60,10 @@ class PacketClonerCalculator : public CalculatorBase {
|
|||
const auto calculator_options =
|
||||
cc->Options<mediapipe::PacketClonerCalculatorOptions>();
|
||||
output_only_when_all_inputs_received_ =
|
||||
calculator_options.output_only_when_all_inputs_received();
|
||||
calculator_options.output_only_when_all_inputs_received() ||
|
||||
calculator_options.output_packets_only_when_all_inputs_received();
|
||||
output_empty_packets_before_all_inputs_received_ =
|
||||
calculator_options.output_packets_only_when_all_inputs_received();
|
||||
|
||||
// Parse input streams.
|
||||
tick_signal_index_ = cc->Inputs().NumEntries() - 1;
|
||||
|
@ -88,6 +91,9 @@ class PacketClonerCalculator : public CalculatorBase {
|
|||
// Return if one of the input is null.
|
||||
for (int i = 0; i < tick_signal_index_; ++i) {
|
||||
if (current_[i].IsEmpty()) {
|
||||
if (output_empty_packets_before_all_inputs_received_) {
|
||||
SetAllNextTimestampBounds(cc);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
}
|
||||
|
@ -107,9 +113,17 @@ class PacketClonerCalculator : public CalculatorBase {
|
|||
}
|
||||
|
||||
private:
|
||||
void SetAllNextTimestampBounds(CalculatorContext* cc) {
|
||||
for (int j = 0; j < tick_signal_index_; ++j) {
|
||||
cc->Outputs().Index(j).SetNextTimestampBound(
|
||||
cc->InputTimestamp().NextAllowedInStream());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Packet> current_;
|
||||
int tick_signal_index_;
|
||||
bool output_only_when_all_inputs_received_;
|
||||
bool output_empty_packets_before_all_inputs_received_;
|
||||
};
|
||||
|
||||
REGISTER_CALCULATOR(PacketClonerCalculator);
|
||||
|
|
|
@ -28,4 +28,9 @@ message PacketClonerCalculatorOptions {
|
|||
// When true, this calculator will drop received TICK packets if any input
|
||||
// stream hasn't received a packet yet.
|
||||
optional bool output_only_when_all_inputs_received = 1 [default = false];
|
||||
|
||||
// Similar with above, but also transmit empty packet for all streams before
|
||||
// all inputs are received.
|
||||
optional bool output_packets_only_when_all_inputs_received = 2
|
||||
[default = false];
|
||||
}
|
||||
|
|
|
@ -32,9 +32,9 @@ public class FaceDetectionResultImageView extends AppCompatImageView {
|
|||
private static final String TAG = "FaceDetectionResultImageView";
|
||||
|
||||
private static final int KEYPOINT_COLOR = Color.RED;
|
||||
private static final int KEYPOINT_RADIUS = 15;
|
||||
private static final int KEYPOINT_RADIUS = 8; // Pixels
|
||||
private static final int BBOX_COLOR = Color.GREEN;
|
||||
private static final int BBOX_THICKNESS = 10;
|
||||
private static final int BBOX_THICKNESS = 5; // Pixels
|
||||
private Bitmap latest;
|
||||
|
||||
public FaceDetectionResultImageView(Context context) {
|
||||
|
|
|
@ -28,7 +28,6 @@ import androidx.activity.result.ActivityResultLauncher;
|
|||
import androidx.activity.result.contract.ActivityResultContracts;
|
||||
import androidx.exifinterface.media.ExifInterface;
|
||||
// ContentResolver dependency
|
||||
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
|
||||
import com.google.mediapipe.solutioncore.CameraInput;
|
||||
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||
import com.google.mediapipe.solutioncore.VideoInput;
|
||||
|
@ -36,6 +35,7 @@ import com.google.mediapipe.solutions.facedetection.FaceDetection;
|
|||
import com.google.mediapipe.solutions.facedetection.FaceDetectionOptions;
|
||||
import com.google.mediapipe.solutions.facedetection.FaceDetectionResult;
|
||||
import com.google.mediapipe.solutions.facedetection.FaceKeypoint;
|
||||
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
|
@ -175,9 +175,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
});
|
||||
imageView = new FaceDetectionResultImageView(this);
|
||||
}
|
||||
|
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.VIDEO);
|
||||
// Reads video from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -334,8 +334,15 @@ public class MainActivity extends AppCompatActivity {
|
|||
|
||||
private void logNoseTipKeypoint(
|
||||
FaceDetectionResult result, int faceIndex, boolean showPixelValues) {
|
||||
if (result.multiFaceDetections().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
RelativeKeypoint noseTip =
|
||||
FaceDetection.getFaceKeypoint(result, faceIndex, FaceKeypoint.NOSE_TIP);
|
||||
result
|
||||
.multiFaceDetections()
|
||||
.get(faceIndex)
|
||||
.getLocationData()
|
||||
.getRelativeKeypoints(FaceKeypoint.NOSE_TIP);
|
||||
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||
if (showPixelValues) {
|
||||
int width = result.inputBitmap().getWidth();
|
||||
|
|
|
@ -34,19 +34,19 @@ public class FaceMeshResultImageView extends AppCompatImageView {
|
|||
private static final String TAG = "FaceMeshResultImageView";
|
||||
|
||||
private static final int TESSELATION_COLOR = Color.parseColor("#70C0C0C0");
|
||||
private static final int TESSELATION_THICKNESS = 5;
|
||||
private static final int TESSELATION_THICKNESS = 3; // Pixels
|
||||
private static final int RIGHT_EYE_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int RIGHT_EYE_THICKNESS = 8;
|
||||
private static final int RIGHT_EYE_THICKNESS = 5; // Pixels
|
||||
private static final int RIGHT_EYEBROW_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int RIGHT_EYEBROW_THICKNESS = 8;
|
||||
private static final int RIGHT_EYEBROW_THICKNESS = 5; // Pixels
|
||||
private static final int LEFT_EYE_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int LEFT_EYE_THICKNESS = 8;
|
||||
private static final int LEFT_EYE_THICKNESS = 5; // Pixels
|
||||
private static final int LEFT_EYEBROW_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int LEFT_EYEBROW_THICKNESS = 8;
|
||||
private static final int LEFT_EYEBROW_THICKNESS = 5; // Pixels
|
||||
private static final int FACE_OVAL_COLOR = Color.parseColor("#E0E0E0");
|
||||
private static final int FACE_OVAL_THICKNESS = 8;
|
||||
private static final int FACE_OVAL_THICKNESS = 5; // Pixels
|
||||
private static final int LIPS_COLOR = Color.parseColor("#E0E0E0");
|
||||
private static final int LIPS_THICKNESS = 8;
|
||||
private static final int LIPS_THICKNESS = 5; // Pixels
|
||||
private Bitmap latest;
|
||||
|
||||
public FaceMeshResultImageView(Context context) {
|
||||
|
|
|
@ -176,9 +176,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
});
|
||||
imageView = new FaceMeshResultImageView(this);
|
||||
}
|
||||
|
@ -240,9 +240,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.VIDEO);
|
||||
// Reads video from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,16 @@ import java.util.List;
|
|||
public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
||||
private static final String TAG = "HandsResultGlRenderer";
|
||||
|
||||
private static final float CONNECTION_THICKNESS = 20.0f;
|
||||
private static final float[] LEFT_HAND_CONNECTION_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||
private static final float[] RIGHT_HAND_CONNECTION_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||
private static final float CONNECTION_THICKNESS = 25.0f;
|
||||
private static final float[] LEFT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||
private static final float[] RIGHT_HAND_HOLLOW_CIRCLE_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||
private static final float HOLLOW_CIRCLE_RADIUS = 0.01f;
|
||||
private static final float[] LEFT_HAND_LANDMARK_COLOR = new float[] {1f, 0.2f, 0.2f, 1f};
|
||||
private static final float[] RIGHT_HAND_LANDMARK_COLOR = new float[] {0.2f, 1f, 0.2f, 1f};
|
||||
private static final float LANDMARK_RADIUS = 0.008f;
|
||||
private static final int NUM_SEGMENTS = 120;
|
||||
private static final String VERTEX_SHADER =
|
||||
"uniform mat4 uProjectionMatrix;\n"
|
||||
+ "attribute vec4 vPosition;\n"
|
||||
|
@ -37,12 +46,14 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
+ "}";
|
||||
private static final String FRAGMENT_SHADER =
|
||||
"precision mediump float;\n"
|
||||
+ "uniform vec4 uColor;\n"
|
||||
+ "void main() {\n"
|
||||
+ " gl_FragColor = vec4(0, 1, 0, 1);\n"
|
||||
+ " gl_FragColor = uColor;\n"
|
||||
+ "}";
|
||||
private int program;
|
||||
private int positionHandle;
|
||||
private int projectionMatrixHandle;
|
||||
private int colorHandle;
|
||||
|
||||
private int loadShader(int type, String shaderCode) {
|
||||
int shader = GLES20.glCreateShader(type);
|
||||
|
@ -61,6 +72,7 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
GLES20.glLinkProgram(program);
|
||||
positionHandle = GLES20.glGetAttribLocation(program, "vPosition");
|
||||
projectionMatrixHandle = GLES20.glGetUniformLocation(program, "uProjectionMatrix");
|
||||
colorHandle = GLES20.glGetUniformLocation(program, "uColor");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -74,7 +86,22 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
|
||||
int numHands = result.multiHandLandmarks().size();
|
||||
for (int i = 0; i < numHands; ++i) {
|
||||
drawLandmarks(result.multiHandLandmarks().get(i).getLandmarkList());
|
||||
boolean isLeftHand = result.multiHandedness().get(i).getLabel().equals("Left");
|
||||
drawConnections(
|
||||
result.multiHandLandmarks().get(i).getLandmarkList(),
|
||||
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
|
||||
for (NormalizedLandmark landmark : result.multiHandLandmarks().get(i).getLandmarkList()) {
|
||||
// Draws the landmark.
|
||||
drawCircle(
|
||||
landmark.getX(),
|
||||
landmark.getY(),
|
||||
isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
|
||||
// Draws a hollow circle around the landmark.
|
||||
drawHollowCircle(
|
||||
landmark.getX(),
|
||||
landmark.getY(),
|
||||
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,7 +114,8 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
GLES20.glDeleteProgram(program);
|
||||
}
|
||||
|
||||
private void drawLandmarks(List<NormalizedLandmark> handLandmarkList) {
|
||||
private void drawConnections(List<NormalizedLandmark> handLandmarkList, float[] colorArray) {
|
||||
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
|
||||
for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
|
||||
NormalizedLandmark start = handLandmarkList.get(c.start());
|
||||
NormalizedLandmark end = handLandmarkList.get(c.end());
|
||||
|
@ -103,4 +131,51 @@ public class HandsResultGlRenderer implements ResultGlRenderer<HandsResult> {
|
|||
GLES20.glDrawArrays(GLES20.GL_LINES, 0, 2);
|
||||
}
|
||||
}
|
||||
|
||||
private void drawCircle(float x, float y, float[] colorArray) {
|
||||
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
|
||||
int vertexCount = NUM_SEGMENTS + 2;
|
||||
float[] vertices = new float[vertexCount * 3];
|
||||
vertices[0] = x;
|
||||
vertices[1] = y;
|
||||
vertices[2] = 0;
|
||||
for (int i = 1; i < vertexCount; i++) {
|
||||
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
|
||||
int currentIndex = 3 * i;
|
||||
vertices[currentIndex] = x + (float) (LANDMARK_RADIUS * Math.cos(angle));
|
||||
vertices[currentIndex + 1] = y + (float) (LANDMARK_RADIUS * Math.sin(angle));
|
||||
vertices[currentIndex + 2] = 0;
|
||||
}
|
||||
FloatBuffer vertexBuffer =
|
||||
ByteBuffer.allocateDirect(vertices.length * 4)
|
||||
.order(ByteOrder.nativeOrder())
|
||||
.asFloatBuffer()
|
||||
.put(vertices);
|
||||
vertexBuffer.position(0);
|
||||
GLES20.glEnableVertexAttribArray(positionHandle);
|
||||
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
|
||||
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_FAN, 0, vertexCount);
|
||||
}
|
||||
|
||||
private void drawHollowCircle(float x, float y, float[] colorArray) {
|
||||
GLES20.glUniform4fv(colorHandle, 1, colorArray, 0);
|
||||
int vertexCount = NUM_SEGMENTS + 1;
|
||||
float[] vertices = new float[vertexCount * 3];
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
float angle = 2.0f * i * (float) Math.PI / NUM_SEGMENTS;
|
||||
int currentIndex = 3 * i;
|
||||
vertices[currentIndex] = x + (float) (HOLLOW_CIRCLE_RADIUS * Math.cos(angle));
|
||||
vertices[currentIndex + 1] = y + (float) (HOLLOW_CIRCLE_RADIUS * Math.sin(angle));
|
||||
vertices[currentIndex + 2] = 0;
|
||||
}
|
||||
FloatBuffer vertexBuffer =
|
||||
ByteBuffer.allocateDirect(vertices.length * 4)
|
||||
.order(ByteOrder.nativeOrder())
|
||||
.asFloatBuffer()
|
||||
.put(vertices);
|
||||
vertexBuffer.position(0);
|
||||
GLES20.glEnableVertexAttribArray(positionHandle);
|
||||
GLES20.glVertexAttribPointer(positionHandle, 3, GLES20.GL_FLOAT, false, 0, vertexBuffer);
|
||||
GLES20.glDrawArrays(GLES20.GL_LINE_STRIP, 0, vertexCount);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,10 +31,15 @@ import java.util.List;
|
|||
public class HandsResultImageView extends AppCompatImageView {
|
||||
private static final String TAG = "HandsResultImageView";
|
||||
|
||||
private static final int LANDMARK_COLOR = Color.RED;
|
||||
private static final int LANDMARK_RADIUS = 15;
|
||||
private static final int CONNECTION_COLOR = Color.GREEN;
|
||||
private static final int CONNECTION_THICKNESS = 10;
|
||||
private static final int LEFT_HAND_CONNECTION_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int RIGHT_HAND_CONNECTION_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int CONNECTION_THICKNESS = 8; // Pixels
|
||||
private static final int LEFT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int RIGHT_HAND_HOLLOW_CIRCLE_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int HOLLOW_CIRCLE_WIDTH = 5; // Pixels
|
||||
private static final int LEFT_HAND_LANDMARK_COLOR = Color.parseColor("#FF3030");
|
||||
private static final int RIGHT_HAND_LANDMARK_COLOR = Color.parseColor("#30FF30");
|
||||
private static final int LANDMARK_RADIUS = 10; // Pixels
|
||||
private Bitmap latest;
|
||||
|
||||
public HandsResultImageView(Context context) {
|
||||
|
@ -62,7 +67,11 @@ public class HandsResultImageView extends AppCompatImageView {
|
|||
int numHands = result.multiHandLandmarks().size();
|
||||
for (int i = 0; i < numHands; ++i) {
|
||||
drawLandmarksOnCanvas(
|
||||
result.multiHandLandmarks().get(i).getLandmarkList(), canvas, width, height);
|
||||
result.multiHandLandmarks().get(i).getLandmarkList(),
|
||||
result.multiHandedness().get(i).getLabel().equals("Left"),
|
||||
canvas,
|
||||
width,
|
||||
height);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,11 +84,16 @@ public class HandsResultImageView extends AppCompatImageView {
|
|||
}
|
||||
|
||||
private void drawLandmarksOnCanvas(
|
||||
List<NormalizedLandmark> handLandmarkList, Canvas canvas, int width, int height) {
|
||||
List<NormalizedLandmark> handLandmarkList,
|
||||
boolean isLeftHand,
|
||||
Canvas canvas,
|
||||
int width,
|
||||
int height) {
|
||||
// Draw connections.
|
||||
for (Hands.Connection c : Hands.HAND_CONNECTIONS) {
|
||||
Paint connectionPaint = new Paint();
|
||||
connectionPaint.setColor(CONNECTION_COLOR);
|
||||
connectionPaint.setColor(
|
||||
isLeftHand ? LEFT_HAND_CONNECTION_COLOR : RIGHT_HAND_CONNECTION_COLOR);
|
||||
connectionPaint.setStrokeWidth(CONNECTION_THICKNESS);
|
||||
NormalizedLandmark start = handLandmarkList.get(c.start());
|
||||
NormalizedLandmark end = handLandmarkList.get(c.end());
|
||||
|
@ -91,11 +105,23 @@ public class HandsResultImageView extends AppCompatImageView {
|
|||
connectionPaint);
|
||||
}
|
||||
Paint landmarkPaint = new Paint();
|
||||
landmarkPaint.setColor(LANDMARK_COLOR);
|
||||
// Draw landmarks.
|
||||
landmarkPaint.setColor(isLeftHand ? LEFT_HAND_LANDMARK_COLOR : RIGHT_HAND_LANDMARK_COLOR);
|
||||
// Draws landmarks.
|
||||
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
|
||||
canvas.drawCircle(
|
||||
landmark.getX() * width, landmark.getY() * height, LANDMARK_RADIUS, landmarkPaint);
|
||||
}
|
||||
// Draws hollow circles around landmarks.
|
||||
landmarkPaint.setColor(
|
||||
isLeftHand ? LEFT_HAND_HOLLOW_CIRCLE_COLOR : RIGHT_HAND_HOLLOW_CIRCLE_COLOR);
|
||||
landmarkPaint.setStrokeWidth(HOLLOW_CIRCLE_WIDTH);
|
||||
landmarkPaint.setStyle(Paint.Style.STROKE);
|
||||
for (LandmarkProto.NormalizedLandmark landmark : handLandmarkList) {
|
||||
canvas.drawCircle(
|
||||
landmark.getX() * width,
|
||||
landmark.getY() * height,
|
||||
LANDMARK_RADIUS + HOLLOW_CIRCLE_WIDTH,
|
||||
landmarkPaint);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import androidx.activity.result.ActivityResultLauncher;
|
|||
import androidx.activity.result.contract.ActivityResultContracts;
|
||||
import androidx.exifinterface.media.ExifInterface;
|
||||
// ContentResolver dependency
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.Landmark;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.solutioncore.CameraInput;
|
||||
import com.google.mediapipe.solutioncore.SolutionGlSurfaceView;
|
||||
|
@ -177,9 +178,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
setupStaticImageModePipeline();
|
||||
}
|
||||
// Reads images from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Images.Media.INTERNAL_CONTENT_URI);
|
||||
imageGetter.launch(gallery);
|
||||
Intent pickImageIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickImageIntent.setDataAndType(MediaStore.Images.Media.INTERNAL_CONTENT_URI, "image/*");
|
||||
imageGetter.launch(pickImageIntent);
|
||||
});
|
||||
imageView = new HandsResultImageView(this);
|
||||
}
|
||||
|
@ -193,7 +194,7 @@ public class MainActivity extends AppCompatActivity {
|
|||
this,
|
||||
HandsOptions.builder()
|
||||
.setStaticImageMode(true)
|
||||
.setMaxNumHands(1)
|
||||
.setMaxNumHands(2)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
|
||||
|
@ -241,9 +242,9 @@ public class MainActivity extends AppCompatActivity {
|
|||
stopCurrentPipeline();
|
||||
setupStreamingModePipeline(InputSource.VIDEO);
|
||||
// Reads video from gallery.
|
||||
Intent gallery =
|
||||
new Intent(Intent.ACTION_PICK, MediaStore.Video.Media.INTERNAL_CONTENT_URI);
|
||||
videoGetter.launch(gallery);
|
||||
Intent pickVideoIntent = new Intent(Intent.ACTION_PICK);
|
||||
pickVideoIntent.setDataAndType(MediaStore.Video.Media.INTERNAL_CONTENT_URI, "video/*");
|
||||
videoGetter.launch(pickVideoIntent);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -269,7 +270,7 @@ public class MainActivity extends AppCompatActivity {
|
|||
this,
|
||||
HandsOptions.builder()
|
||||
.setStaticImageMode(false)
|
||||
.setMaxNumHands(1)
|
||||
.setMaxNumHands(2)
|
||||
.setRunOnGpu(RUN_ON_GPU)
|
||||
.build());
|
||||
hands.setErrorListener((message, e) -> Log.e(TAG, "MediaPipe Hands error:" + message));
|
||||
|
@ -336,7 +337,11 @@ public class MainActivity extends AppCompatActivity {
|
|||
}
|
||||
|
||||
private void logWristLandmark(HandsResult result, boolean showPixelValues) {
|
||||
NormalizedLandmark wristLandmark = Hands.getHandLandmark(result, 0, HandLandmark.WRIST);
|
||||
if (result.multiHandLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
NormalizedLandmark wristLandmark =
|
||||
result.multiHandLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||
// For Bitmaps, show the pixel values. For texture inputs, show the normalized coordinates.
|
||||
if (showPixelValues) {
|
||||
int width = result.inputBitmap().getWidth();
|
||||
|
@ -353,5 +358,16 @@ public class MainActivity extends AppCompatActivity {
|
|||
"MediaPipe Hand wrist normalized coordinates (value range: [0, 1]): x=%f, y=%f",
|
||||
wristLandmark.getX(), wristLandmark.getY()));
|
||||
}
|
||||
if (result.multiHandWorldLandmarks().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
Landmark wristWorldLandmark =
|
||||
result.multiHandWorldLandmarks().get(0).getLandmarkList().get(HandLandmark.WRIST);
|
||||
Log.i(
|
||||
TAG,
|
||||
String.format(
|
||||
"MediaPipe Hand wrist world coordinates (in meters with the origin at the hand's"
|
||||
+ " approximate geometric center): x=%f m, y=%f m, z=%f m",
|
||||
wristWorldLandmark.getX(), wristWorldLandmark.getY(), wristWorldLandmark.getZ()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ android_binary(
|
|||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu.binarypb",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
|
|
@ -37,9 +37,11 @@ android_binary(
|
|||
srcs = glob(["*.java"]),
|
||||
assets = [
|
||||
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml",
|
||||
|
@ -53,6 +55,7 @@ android_binary(
|
|||
"outputVideoStreamName": "output_video",
|
||||
"flipFramesVertically": "True",
|
||||
"converterNumBuffers": "2",
|
||||
# "modelComplexity": "0" # 0=lite, 1=heavy, not specified=heavy
|
||||
},
|
||||
multidex = "native",
|
||||
deps = [
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
|
||||
package com.google.mediapipe.apps.handtrackinggpu;
|
||||
|
||||
import android.content.pm.ApplicationInfo;
|
||||
import android.content.pm.PackageManager;
|
||||
import android.content.pm.PackageManager.NameNotFoundException;
|
||||
import android.os.Bundle;
|
||||
import android.util.Log;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
|
@ -30,6 +33,7 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
|||
private static final String TAG = "MainActivity";
|
||||
|
||||
private static final String INPUT_NUM_HANDS_SIDE_PACKET_NAME = "num_hands";
|
||||
private static final String INPUT_MODEL_COMPLEXITY = "model_complexity";
|
||||
private static final String OUTPUT_LANDMARKS_STREAM_NAME = "hand_landmarks";
|
||||
// Max number of hands to detect/process.
|
||||
private static final int NUM_HANDS = 2;
|
||||
|
@ -38,9 +42,22 @@ public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity {
|
|||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
|
||||
ApplicationInfo applicationInfo;
|
||||
try {
|
||||
applicationInfo =
|
||||
getPackageManager().getApplicationInfo(getPackageName(), PackageManager.GET_META_DATA);
|
||||
} catch (NameNotFoundException e) {
|
||||
throw new AssertionError(e);
|
||||
}
|
||||
|
||||
AndroidPacketCreator packetCreator = processor.getPacketCreator();
|
||||
Map<String, Packet> inputSidePackets = new HashMap<>();
|
||||
inputSidePackets.put(INPUT_NUM_HANDS_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_HANDS));
|
||||
if (applicationInfo.metaData.containsKey("modelComplexity")) {
|
||||
inputSidePackets.put(
|
||||
INPUT_MODEL_COMPLEXITY,
|
||||
packetCreator.createInt32(applicationInfo.metaData.getInt("modelComplexity")));
|
||||
}
|
||||
processor.setInputSidePackets(inputSidePackets);
|
||||
|
||||
// To show verbose logging, run:
|
||||
|
|
|
@ -282,8 +282,12 @@ absl::Status KinematicPathSolver::UpdatePixelsPerDegree(
|
|||
|
||||
absl::Status KinematicPathSolver::UpdateMinMaxLocation(const int min_location,
|
||||
const int max_location) {
|
||||
RET_CHECK(initialized_)
|
||||
<< "UpdateMinMaxLocation called before first observation added.";
|
||||
if (!initialized_) {
|
||||
max_location_ = max_location;
|
||||
min_location_ = min_location;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
double prior_distance = max_location_ - min_location_;
|
||||
double updated_distance = max_location - min_location;
|
||||
double scale_change = updated_distance / prior_distance;
|
||||
|
|
|
@ -435,6 +435,23 @@ TEST(KinematicPathSolverTest, PassBorderTest) {
|
|||
EXPECT_FLOAT_EQ(state, 404.56668);
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfUninitialized) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(2.0);
|
||||
options.set_max_velocity(1000);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
|
||||
}
|
||||
|
||||
TEST(KinematicPathSolverTest, PassUpdateUpdateMinMaxLocationIfInitialized) {
|
||||
KinematicOptions options;
|
||||
options.set_min_motion_to_reframe(2.0);
|
||||
options.set_max_velocity(1000);
|
||||
KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
|
||||
MP_ASSERT_OK(solver.AddObservation(500, kMicroSecInSec * 0));
|
||||
MP_EXPECT_OK(solver.UpdateMinMaxLocation(0, 500));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace autoflip
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -55,7 +55,7 @@ objc_library(
|
|||
name = "HandDetectionGpuAppLibrary",
|
||||
data = [
|
||||
"//mediapipe/graphs/hand_tracking:hand_detection_mobile_gpu_binary_graph",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
||||
|
|
|
@ -64,7 +64,7 @@ objc_library(
|
|||
"//mediapipe/graphs/hand_tracking:hand_tracking_mobile_gpu.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
|
||||
],
|
||||
deps = [
|
||||
"//mediapipe/examples/ios/common:CommonMediaPipeAppLibrary",
|
||||
|
|
40
mediapipe/framework/formats/body_rig.proto
Normal file
40
mediapipe/framework/formats/body_rig.proto
Normal file
|
@ -0,0 +1,40 @@
|
|||
// Copyright 2021 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
// Joint of a 3D human model (e.g. elbow, knee, wrist). Contains 3D rotation of
|
||||
// the joint and its visibility.
|
||||
message Joint {
|
||||
// Joint rotation in 6D contineous representation.
|
||||
// Such representation is more sutable for NN model training and can be
|
||||
// converted to quaternions and Euler angles if needed. Details can be found
|
||||
// in https://arxiv.org/abs/1812.07035.
|
||||
repeated float rotation_6d = 1;
|
||||
|
||||
// Joint visibility.
|
||||
// Float score of whether joint is visible: present on the screen and not
|
||||
// occluded by other objects. Depending on the model, visibility value is
|
||||
// either a sigmoid or an argument of sigmoid, but in any case higher value
|
||||
// indicates higher probability of joint being visible. Should stay unset if
|
||||
// not supported.
|
||||
optional float visibility = 2;
|
||||
}
|
||||
|
||||
// Group of Joint protos.
|
||||
message JointList {
|
||||
repeated Joint joint = 1;
|
||||
}
|
|
@ -109,8 +109,7 @@ class Image {
|
|||
return gpu_buffer_.GetCVPixelBufferRef();
|
||||
}
|
||||
#else
|
||||
const mediapipe::GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr()
|
||||
const {
|
||||
mediapipe::GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
|
||||
if (use_gpu_ == false) ConvertToGpu();
|
||||
return gpu_buffer_.GetGlTextureBufferSharedPtr();
|
||||
}
|
||||
|
|
|
@ -23,8 +23,7 @@
|
|||
// of projects that may want to build MediaPipe using alternative build systems,
|
||||
// we also try to set platform-specific defines in this header if missing.
|
||||
#if !defined(MEDIAPIPE_MOBILE) && \
|
||||
(defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) || \
|
||||
defined(__EMSCRIPTEN__))
|
||||
(defined(__ANDROID__) || defined(__EMSCRIPTEN__))
|
||||
#define MEDIAPIPE_MOBILE
|
||||
#endif
|
||||
|
||||
|
@ -36,6 +35,11 @@
|
|||
#include "TargetConditionals.h" // for TARGET_OS_*
|
||||
#if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX
|
||||
#define MEDIAPIPE_IOS
|
||||
|
||||
#if !defined(MEDIAPIPE_MOBILE) && !TARGET_OS_OSX
|
||||
#define MEDIAPIPE_MOBILE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#if !defined(MEDIAPIPE_OSX) && TARGET_OS_OSX
|
||||
#define MEDIAPIPE_OSX
|
||||
|
|
|
@ -65,9 +65,9 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
|
|||
|
||||
OptionsSyntaxUtil syntax_util;
|
||||
for (auto& node : *config->mutable_node()) {
|
||||
for (const std::string& option_def : node.option_value()) {
|
||||
FieldData node_data = options_field_util::AsFieldData(node);
|
||||
|
||||
for (const std::string& option_def : node.option_value()) {
|
||||
std::vector<absl::string_view> tag_and_name =
|
||||
syntax_util.StrSplitTags(option_def);
|
||||
std::string graph_tag = syntax_util.OptionFieldsTag(tag_and_name[1]);
|
||||
|
@ -96,6 +96,7 @@ absl::Status CopyLiteralOptions(CalculatorGraphConfig::Node parent_node,
|
|||
status.Update(MergeField(node_path, packet_data, &node_options));
|
||||
options_field_util::SetOptionsMessage(node_options, &node);
|
||||
}
|
||||
node.clear_option_value();
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
|
|
@ -137,7 +137,6 @@ TEST_F(OptionsUtilTest, CopyLiteralOptions) {
|
|||
NightLightCalculatorOptions expected_node_options;
|
||||
expected_node_options.add_num_lights(8);
|
||||
expected_node.add_node_options()->PackFrom(expected_node_options);
|
||||
*expected_node.add_option_value() = "num_lights:options/chain_length";
|
||||
EXPECT_THAT(actual_node, EqualsProto(expected_node));
|
||||
|
||||
MP_EXPECT_OK(graph.StartRun({}));
|
||||
|
|
|
@ -656,7 +656,6 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
|
|||
chain_length: 3
|
||||
}
|
||||
}
|
||||
option_value: "chain_length:options/chain_length"
|
||||
}
|
||||
type: "MoonSubgraph"
|
||||
graph_options {
|
||||
|
@ -666,5 +665,84 @@ TEST(SubgraphExpansionTest, SimpleSubgraphOptionsUsage) {
|
|||
EXPECT_THAT(moon_subgraph, mediapipe::EqualsProto(expected_graph));
|
||||
}
|
||||
|
||||
// Shows ExpandSubgraphs applied twice. "option_value" fields are evaluated
|
||||
// and removed on the first ExpandSubgraphs call. If "option_value" fields
|
||||
// are not removed during ExpandSubgraphs, they evaluate incorrectly on the
|
||||
// second ExpandSubgraphs call and this test fails on "expected_node_options".
|
||||
TEST(SubgraphExpansionTest, SimpleSubgraphOptionsTwice) {
|
||||
GraphRegistry graph_registry;
|
||||
|
||||
// Register a simple-subgraph that accepts graph options.
|
||||
auto moon_subgraph =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
type: "MoonSubgraph"
|
||||
graph_options: {
|
||||
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
|
||||
}
|
||||
node: {
|
||||
calculator: "MoonCalculator"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
|
||||
}
|
||||
option_value: "chain_length:options/chain_length"
|
||||
}
|
||||
)pb");
|
||||
graph_registry.Register("MoonSubgraph", moon_subgraph);
|
||||
|
||||
// Invoke the simple-subgraph with graph options.
|
||||
// The empty NodeChainSubgraphOptions below allows "option_value" fields
|
||||
// on "MoonCalculator" to evaluate incorrectly, if not removed.
|
||||
auto sky_graph = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
graph_options: {
|
||||
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
|
||||
}
|
||||
node: {
|
||||
calculator: "MoonSubgraph"
|
||||
options: {
|
||||
[mediapipe.NodeChainSubgraphOptions.ext] {
|
||||
node_type: "DoubleIntCalculator"
|
||||
chain_length: 3
|
||||
}
|
||||
}
|
||||
}
|
||||
)pb");
|
||||
|
||||
// The first ExpandSubgraphs call evaluates and removes "option_value" fields.
|
||||
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
|
||||
auto expanded_1 = sky_graph;
|
||||
|
||||
// The second ExpandSubgraphs call has no effect on the expanded graph.
|
||||
MP_ASSERT_OK(tool::ExpandSubgraphs(&sky_graph, &graph_registry));
|
||||
|
||||
// Validate the expected node_options for the "MoonSubgraph".
|
||||
// If the "option_value" fields are not removed during ExpandSubgraphs,
|
||||
// this test fails with an incorrect value for "chain_length".
|
||||
auto expected_node_options =
|
||||
mediapipe::ParseTextProtoOrDie<mediapipe::NodeChainSubgraphOptions>(
|
||||
"chain_length: 3");
|
||||
mediapipe::NodeChainSubgraphOptions node_options;
|
||||
sky_graph.node(0).node_options(0).UnpackTo(&node_options);
|
||||
ASSERT_THAT(node_options, mediapipe::EqualsProto(expected_node_options));
|
||||
|
||||
// Validate the results from both ExpandSubgraphs() calls.
|
||||
CalculatorGraphConfig expected_graph =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
graph_options {
|
||||
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {}
|
||||
}
|
||||
node {
|
||||
name: "moonsubgraph__MoonCalculator"
|
||||
calculator: "MoonCalculator"
|
||||
node_options {
|
||||
[type.googleapis.com/mediapipe.NodeChainSubgraphOptions] {
|
||||
chain_length: 3
|
||||
}
|
||||
}
|
||||
}
|
||||
)pb");
|
||||
EXPECT_THAT(expanded_1, mediapipe::EqualsProto(expected_graph));
|
||||
EXPECT_THAT(sky_graph, mediapipe::EqualsProto(expected_graph));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -112,13 +112,13 @@ GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
|
|||
|
||||
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
|
||||
const GpuBuffer& gpu_buffer, int plane) {
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(plane));
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(plane));
|
||||
}
|
||||
|
||||
GlTexture GlCalculatorHelperImpl::CreateSourceTexture(
|
||||
const ImageFrame& image_frame) {
|
||||
auto gpu_buffer = GpuBuffer::CopyingImageFrame(image_frame);
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureReadView(0));
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetReadView<GlTextureView>(0));
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -149,7 +149,7 @@ GlTexture GlCalculatorHelperImpl::CreateDestinationTexture(
|
|||
|
||||
GpuBuffer gpu_buffer =
|
||||
gpu_resources_.gpu_buffer_pool().GetBuffer(width, height, format);
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetGlTextureWriteView(0));
|
||||
return MapGpuBuffer(gpu_buffer, gpu_buffer.GetWriteView<GlTextureView>(0));
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -224,7 +224,8 @@ void GlTextureBuffer::WaitForConsumersOnGpu() {
|
|||
// precisely, on only one GL context.
|
||||
}
|
||||
|
||||
GlTextureView GlTextureBuffer::GetGlTextureReadView(
|
||||
GlTextureView GlTextureBuffer::GetReadView(
|
||||
mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
|
||||
auto gl_context = GlContext::GetCurrent();
|
||||
CHECK(gl_context);
|
||||
|
@ -241,7 +242,8 @@ GlTextureView GlTextureBuffer::GetGlTextureReadView(
|
|||
nullptr);
|
||||
}
|
||||
|
||||
GlTextureView GlTextureBuffer::GetGlTextureWriteView(
|
||||
GlTextureView GlTextureBuffer::GetWriteView(
|
||||
mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
|
||||
auto gl_context = GlContext::GetCurrent();
|
||||
CHECK(gl_context);
|
||||
|
@ -341,7 +343,8 @@ std::unique_ptr<ImageFrame> GlTextureBuffer::AsImageFrame() const {
|
|||
ImageFormat::Format image_format = ImageFormatForGpuBufferFormat(format());
|
||||
auto output = absl::make_unique<ImageFrame>(
|
||||
image_format, width(), height(), ImageFrame::kGlDefaultAlignmentBoundary);
|
||||
auto view = GetGlTextureReadView(nullptr, 0);
|
||||
auto view =
|
||||
GetReadView(mediapipe::internal::types<GlTextureView>{}, nullptr, 0);
|
||||
ReadTexture(view, format(), output->MutablePixelData(),
|
||||
output->PixelDataSize());
|
||||
return output;
|
||||
|
|
|
@ -32,7 +32,9 @@ namespace mediapipe {
|
|||
class GlCalculatorHelperImpl;
|
||||
|
||||
// Implements a GPU memory buffer as an OpenGL texture. For internal use.
|
||||
class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
|
||||
class GlTextureBuffer
|
||||
: public mediapipe::internal::GpuBufferStorageImpl<
|
||||
GlTextureBuffer, mediapipe::internal::ViewProvider<GlTextureView>> {
|
||||
public:
|
||||
// This is called when the texture buffer is deleted. It is passed a sync
|
||||
// token created at that time on the GlContext. If the GlTextureBuffer has
|
||||
|
@ -86,11 +88,12 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
|
|||
int height() const { return height_; }
|
||||
GpuBufferFormat format() const { return format_; }
|
||||
|
||||
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) const override;
|
||||
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) override;
|
||||
void ViewDoneWriting(const GlTextureView& view) override;
|
||||
std::unique_ptr<ImageFrame> AsImageFrame() const override;
|
||||
|
||||
// If this texture is going to be used outside of the context that produced
|
||||
|
@ -142,6 +145,8 @@ class GlTextureBuffer : public mediapipe::internal::GpuBufferStorage {
|
|||
// Returns true on success.
|
||||
bool CreateInternal(const void* data, int alignment = 4);
|
||||
|
||||
void ViewDoneWriting(const GlTextureView& view);
|
||||
|
||||
friend class GlCalculatorHelperImpl;
|
||||
|
||||
GLuint name_ = 0;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#ifndef MEDIAPIPE_GPU_GPU_BUFFER_H_
|
||||
#define MEDIAPIPE_GPU_GPU_BUFFER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
|
@ -23,6 +24,10 @@
|
|||
#include "mediapipe/gpu/gpu_buffer_format.h"
|
||||
#include "mediapipe/gpu/gpu_buffer_storage.h"
|
||||
|
||||
// Note: these headers are needed for the legacy storage APIs. Do not add more
|
||||
// storage-specific headers here. See WebGpuTextureBuffer/View for an example
|
||||
// of adding a new storage and view.
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <CoreVideo/CoreVideo.h>
|
||||
|
||||
|
@ -31,9 +36,7 @@
|
|||
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
#include "mediapipe/gpu/gpu_buffer_storage_cv_pixel_buffer.h"
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
|
||||
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
#else
|
||||
#include "mediapipe/gpu/gl_texture_buffer.h"
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
|
||||
|
@ -60,19 +63,28 @@ class GpuBuffer {
|
|||
// are not portable. Applications and calculators should normally obtain
|
||||
// GpuBuffers in a portable way from the framework, e.g. using
|
||||
// GpuBufferMultiPool.
|
||||
explicit GpuBuffer(
|
||||
std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage)
|
||||
: storage_(std::move(storage)) {}
|
||||
|
||||
// Note: these constructors and accessors for specific storage types exist
|
||||
// for backwards compatibility reasons. Do not add new ones.
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
explicit GpuBuffer(CFHolder<CVPixelBufferRef> pixel_buffer)
|
||||
: pixel_buffer_(std::move(pixel_buffer)) {}
|
||||
: storage_(std::make_shared<GpuBufferStorageCvPixelBuffer>(
|
||||
std::move(pixel_buffer))) {}
|
||||
explicit GpuBuffer(CVPixelBufferRef pixel_buffer)
|
||||
: pixel_buffer_(pixel_buffer) {}
|
||||
: storage_(
|
||||
std::make_shared<GpuBufferStorageCvPixelBuffer>(pixel_buffer)) {}
|
||||
|
||||
CVPixelBufferRef GetCVPixelBufferRef() const { return *pixel_buffer_; }
|
||||
CVPixelBufferRef GetCVPixelBufferRef() const {
|
||||
auto p = storage_->down_cast<GpuBufferStorageCvPixelBuffer>();
|
||||
if (p) return **p;
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
explicit GpuBuffer(GlTextureBufferSharedPtr texture_buffer)
|
||||
: texture_buffer_(std::move(texture_buffer)) {}
|
||||
|
||||
const GlTextureBufferSharedPtr& GetGlTextureBufferSharedPtr() const {
|
||||
return texture_buffer_;
|
||||
GlTextureBufferSharedPtr GetGlTextureBufferSharedPtr() const {
|
||||
return internal_storage<GlTextureBuffer>();
|
||||
}
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
|
||||
|
@ -93,14 +105,26 @@ class GpuBuffer {
|
|||
// Allow assignment from nullptr.
|
||||
GpuBuffer& operator=(std::nullptr_t other);
|
||||
|
||||
GlTextureView GetGlTextureReadView(int plane) const {
|
||||
return current_storage().GetGlTextureReadView(
|
||||
std::make_shared<GpuBuffer>(*this), plane);
|
||||
// Gets a read view of the specified type. The arguments depend on the
|
||||
// specific view type; see the corresponding ViewProvider.
|
||||
template <class View, class... Args>
|
||||
auto GetReadView(Args... args) const {
|
||||
return current_storage()
|
||||
.down_cast<mediapipe::internal::ViewProvider<View>>()
|
||||
->GetReadView(mediapipe::internal::types<View>{},
|
||||
std::make_shared<GpuBuffer>(*this),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
GlTextureView GetGlTextureWriteView(int plane) {
|
||||
return current_storage().GetGlTextureWriteView(
|
||||
std::make_shared<GpuBuffer>(*this), plane);
|
||||
// Gets a write view of the specified type. The arguments depend on the
|
||||
// specific view type; see the corresponding ViewProvider.
|
||||
template <class View, class... Args>
|
||||
auto GetWriteView(Args... args) {
|
||||
return current_storage()
|
||||
.down_cast<mediapipe::internal::ViewProvider<View>>()
|
||||
->GetWriteView(mediapipe::internal::types<View>{},
|
||||
std::make_shared<GpuBuffer>(*this),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// Make a GpuBuffer copying the data from an ImageFrame.
|
||||
|
@ -115,77 +139,57 @@ class GpuBuffer {
|
|||
return current_storage().AsImageFrame();
|
||||
}
|
||||
|
||||
// Attempts to access an underlying storage object of the specified type.
|
||||
// This method is meant for internal use: user code should access the contents
|
||||
// using views.
|
||||
template <class T>
|
||||
std::shared_ptr<T> internal_storage() const {
|
||||
if (storage_->down_cast<T>()) return std::static_pointer_cast<T>(storage_);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
class PlaceholderGpuBufferStorage
|
||||
: public mediapipe::internal::GpuBufferStorage {
|
||||
: public mediapipe::internal::GpuBufferStorageImpl<
|
||||
PlaceholderGpuBufferStorage> {
|
||||
public:
|
||||
int width() const override { return 0; }
|
||||
int height() const override { return 0; }
|
||||
virtual GpuBufferFormat format() const override {
|
||||
return GpuBufferFormat::kUnknown;
|
||||
}
|
||||
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) const override {
|
||||
return {};
|
||||
}
|
||||
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) override {
|
||||
return {};
|
||||
}
|
||||
void ViewDoneWriting(const GlTextureView& view) override{};
|
||||
std::unique_ptr<ImageFrame> AsImageFrame() const override {
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
mediapipe::internal::GpuBufferStorage& no_storage() const {
|
||||
static PlaceholderGpuBufferStorage placeholder;
|
||||
std::shared_ptr<mediapipe::internal::GpuBufferStorage>& no_storage() const {
|
||||
static auto placeholder =
|
||||
std::static_pointer_cast<mediapipe::internal::GpuBufferStorage>(
|
||||
std::make_shared<PlaceholderGpuBufferStorage>());
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
const mediapipe::internal::GpuBufferStorage& current_storage() const {
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
if (pixel_buffer_ != nullptr) return pixel_buffer_;
|
||||
#else
|
||||
if (texture_buffer_) return *texture_buffer_;
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
return no_storage();
|
||||
return *storage_;
|
||||
}
|
||||
|
||||
mediapipe::internal::GpuBufferStorage& current_storage() {
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
if (pixel_buffer_ != nullptr) return pixel_buffer_;
|
||||
#else
|
||||
if (texture_buffer_) return *texture_buffer_;
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
return no_storage();
|
||||
}
|
||||
mediapipe::internal::GpuBufferStorage& current_storage() { return *storage_; }
|
||||
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
GpuBufferStorageCvPixelBuffer pixel_buffer_;
|
||||
#else
|
||||
GlTextureBufferSharedPtr texture_buffer_;
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
std::shared_ptr<mediapipe::internal::GpuBufferStorage> storage_ =
|
||||
no_storage();
|
||||
};
|
||||
|
||||
inline bool GpuBuffer::operator==(std::nullptr_t other) const {
|
||||
return ¤t_storage() == &no_storage();
|
||||
return storage_ == no_storage();
|
||||
}
|
||||
|
||||
inline bool GpuBuffer::operator==(const GpuBuffer& other) const {
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
return pixel_buffer_ == other.pixel_buffer_;
|
||||
#else
|
||||
return texture_buffer_ == other.texture_buffer_;
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
return storage_ == other.storage_;
|
||||
}
|
||||
|
||||
inline GpuBuffer& GpuBuffer::operator=(std::nullptr_t other) {
|
||||
#if MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
pixel_buffer_.reset(other);
|
||||
#else
|
||||
texture_buffer_ = other;
|
||||
#endif // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
|
||||
storage_ = no_storage();
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,27 +12,73 @@ class GpuBuffer;
|
|||
namespace mediapipe {
|
||||
namespace internal {
|
||||
|
||||
using mediapipe::GlTextureView;
|
||||
using mediapipe::GpuBuffer;
|
||||
using mediapipe::GpuBufferFormat;
|
||||
template <class... T>
|
||||
struct types {};
|
||||
|
||||
class GlTextureViewManager {
|
||||
template <class V>
|
||||
class ViewProvider;
|
||||
|
||||
// Note: this specialization temporarily lives here for backwards compatibility
|
||||
// reasons. New specializations should be put in the same file as their view.
|
||||
template <>
|
||||
class ViewProvider<GlTextureView> {
|
||||
public:
|
||||
virtual ~GlTextureViewManager() = default;
|
||||
virtual GlTextureView GetGlTextureReadView(
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const = 0;
|
||||
virtual GlTextureView GetGlTextureWriteView(
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) = 0;
|
||||
virtual void ViewDoneWriting(const GlTextureView& view) = 0;
|
||||
virtual ~ViewProvider() = default;
|
||||
// Note that the view type is encoded in an argument to allow overloading,
|
||||
// so a storage class can implement GetRead/WriteView for multiple view types.
|
||||
// We cannot use a template function because it cannot be virtual; we want to
|
||||
// have a virtual function here to enforce that different storages supporting
|
||||
// the same view implement the same signature.
|
||||
// Note that we allow different views to have custom signatures, providing
|
||||
// additional view-specific arguments that may be needed.
|
||||
virtual GlTextureView GetReadView(types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) const = 0;
|
||||
virtual GlTextureView GetWriteView(types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) = 0;
|
||||
};
|
||||
|
||||
class GpuBufferStorage : public GlTextureViewManager {
|
||||
class GpuBufferStorage {
|
||||
public:
|
||||
virtual ~GpuBufferStorage() = default;
|
||||
virtual int width() const = 0;
|
||||
virtual int height() const = 0;
|
||||
virtual GpuBufferFormat format() const = 0;
|
||||
virtual std::unique_ptr<ImageFrame> AsImageFrame() const = 0;
|
||||
// We can't use dynamic_cast since we want to support building without RTTI.
|
||||
// The public methods delegate to the type-erased private virtual method.
|
||||
template <class T>
|
||||
T* down_cast() {
|
||||
return static_cast<T*>(
|
||||
const_cast<void*>(down_cast(tool::GetTypeHash<T>())));
|
||||
}
|
||||
template <class T>
|
||||
const T* down_cast() const {
|
||||
return static_cast<const T*>(down_cast(tool::GetTypeHash<T>()));
|
||||
}
|
||||
|
||||
private:
|
||||
virtual const void* down_cast(size_t type_hash) const = 0;
|
||||
virtual size_t storage_type_hash() const = 0;
|
||||
};
|
||||
|
||||
template <class T, class... U>
|
||||
class GpuBufferStorageImpl : public GpuBufferStorage, public U... {
|
||||
private:
|
||||
virtual const void* down_cast(size_t type_hash) const override {
|
||||
return down_cast_impl(type_hash, types<T, U...>{});
|
||||
}
|
||||
size_t storage_type_hash() const override { return tool::GetTypeHash<T>(); }
|
||||
|
||||
const void* down_cast_impl(size_t type_hash, types<>) const {
|
||||
return nullptr;
|
||||
}
|
||||
template <class V, class... W>
|
||||
const void* down_cast_impl(size_t type_hash, types<V, W...>) const {
|
||||
if (type_hash == tool::GetTypeHash<V>()) return static_cast<const V*>(this);
|
||||
return down_cast_impl(type_hash, types<W...>{});
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
|
|
@ -11,7 +11,8 @@ typedef CVOpenGLTextureRef CVTextureType;
|
|||
typedef CVOpenGLESTextureRef CVTextureType;
|
||||
#endif // TARGET_OS_OSX
|
||||
|
||||
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
|
||||
GlTextureView GpuBufferStorageCvPixelBuffer::GetReadView(
|
||||
mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) const {
|
||||
CVReturn err;
|
||||
auto gl_context = GlContext::GetCurrent();
|
||||
|
@ -58,11 +59,13 @@ GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureReadView(
|
|||
#endif // TARGET_OS_OSX
|
||||
}
|
||||
|
||||
GlTextureView GpuBufferStorageCvPixelBuffer::GetGlTextureWriteView(
|
||||
GlTextureView GpuBufferStorageCvPixelBuffer::GetWriteView(
|
||||
mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer, int plane) {
|
||||
// For this storage there is currently no difference between read and write
|
||||
// views, so we delegate to the read method.
|
||||
return GetGlTextureReadView(std::move(gpu_buffer), plane);
|
||||
return GetReadView(mediapipe::internal::types<GlTextureView>{},
|
||||
std::move(gpu_buffer), plane);
|
||||
}
|
||||
|
||||
void GpuBufferStorageCvPixelBuffer::ViewDoneWriting(const GlTextureView& view) {
|
||||
|
|
|
@ -12,7 +12,9 @@ namespace mediapipe {
|
|||
class GlContext;
|
||||
|
||||
class GpuBufferStorageCvPixelBuffer
|
||||
: public mediapipe::internal::GpuBufferStorage,
|
||||
: public mediapipe::internal::GpuBufferStorageImpl<
|
||||
GpuBufferStorageCvPixelBuffer,
|
||||
mediapipe::internal::ViewProvider<GlTextureView>>,
|
||||
public CFHolder<CVPixelBufferRef> {
|
||||
public:
|
||||
using CFHolder<CVPixelBufferRef>::CFHolder;
|
||||
|
@ -28,12 +30,16 @@ class GpuBufferStorageCvPixelBuffer
|
|||
return GpuBufferFormatForCVPixelFormat(
|
||||
CVPixelBufferGetPixelFormatType(**this));
|
||||
}
|
||||
GlTextureView GetGlTextureReadView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
GlTextureView GetReadView(mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) const override;
|
||||
GlTextureView GetGlTextureWriteView(std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
GlTextureView GetWriteView(mediapipe::internal::types<GlTextureView>,
|
||||
std::shared_ptr<GpuBuffer> gpu_buffer,
|
||||
int plane) override;
|
||||
std::unique_ptr<ImageFrame> AsImageFrame() const override;
|
||||
void ViewDoneWriting(const GlTextureView& view) override;
|
||||
|
||||
private:
|
||||
void ViewDoneWriting(const GlTextureView& view);
|
||||
};
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -8,6 +8,9 @@ input_stream: "input_video"
|
|||
# Max number of hands to detect/process. (int)
|
||||
input_side_packet: "num_hands"
|
||||
|
||||
# Model complexity (0 or 1). (int)
|
||||
input_side_packet: "model_complexity"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
output_stream: "output_video"
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
|
@ -39,6 +42,7 @@ node {
|
|||
node {
|
||||
calculator: "HandLandmarkTrackingGpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
|
|
|
@ -370,6 +370,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
|
|||
jmethodID processMethod = env->GetMethodID(
|
||||
callback_cls, process_method_name.c_str(), "(Ljava/util/List;)V");
|
||||
|
||||
// TODO: move to register natives.
|
||||
jclass list_cls = env->FindClass("java/util/ArrayList");
|
||||
jobject java_list =
|
||||
env->NewObject(list_cls, env->GetMethodID(list_cls, "<init>", "()V"));
|
||||
|
@ -392,6 +393,7 @@ void Graph::CallbackToJava(JNIEnv* env, jobject java_callback_obj,
|
|||
RemovePacket(packet_handle);
|
||||
}
|
||||
env->DeleteLocalRef(callback_cls);
|
||||
env->DeleteLocalRef(list_cls);
|
||||
env->DeleteLocalRef(java_list);
|
||||
VLOG(2) << "Returned from java callback.";
|
||||
}
|
||||
|
|
|
@ -56,8 +56,11 @@ JNIEXPORT jobjectArray JNICALL GRAPH_PROFILER_METHOD(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// TODO: move to register natives.
|
||||
jclass byte_array_cls = env->FindClass("[B");
|
||||
jobjectArray profiles =
|
||||
env->NewObjectArray(num_profiles, env->FindClass("[B"), nullptr);
|
||||
env->NewObjectArray(num_profiles, byte_array_cls, nullptr);
|
||||
env->DeleteLocalRef(byte_array_cls);
|
||||
for (int i = 0; i < num_profiles; i++) {
|
||||
const auto& profile = profiles_vec[i];
|
||||
int size = profile.ByteSize();
|
||||
|
|
|
@ -143,8 +143,10 @@ jthrowable CreateMediaPipeException(JNIEnv* env, absl::Status status) {
|
|||
env->SetByteArrayRegion(message_bytes, 0, length,
|
||||
reinterpret_cast<jbyte*>(const_cast<char*>(
|
||||
std::string(status.message()).c_str())));
|
||||
return reinterpret_cast<jthrowable>(
|
||||
jthrowable result = reinterpret_cast<jthrowable>(
|
||||
env->NewObject(status_cls, status_ctr, status.code(), message_bytes));
|
||||
env->DeleteLocalRef(status_cls);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool ThrowIfError(JNIEnv* env, absl::Status status) {
|
||||
|
@ -165,11 +167,11 @@ SerializedMessageIds::SerializedMessageIds(JNIEnv* env, jobject data) {
|
|||
class_registry.GetFieldName(serialized_message, "typeName");
|
||||
std::string value_obfuscated =
|
||||
class_registry.GetFieldName(serialized_message, "value");
|
||||
jclass j_class = reinterpret_cast<jclass>(
|
||||
env->NewGlobalRef(env->FindClass(serialized_message_obfuscated.c_str())));
|
||||
jclass j_class = env->FindClass(serialized_message_obfuscated.c_str());
|
||||
type_name_id = env->GetFieldID(j_class, type_name_obfuscated.c_str(),
|
||||
"Ljava/lang/String;");
|
||||
value_id = env->GetFieldID(j_class, value_obfuscated.c_str(), "[B");
|
||||
env->DeleteLocalRef(j_class);
|
||||
}
|
||||
|
||||
} // namespace android
|
||||
|
|
|
@ -184,8 +184,11 @@ JNIEXPORT jobjectArray JNICALL PACKET_GETTER_METHOD(nativeGetProtoVector)(
|
|||
}
|
||||
const std::vector<const ::mediapipe::proto_ns::MessageLite*>& proto_vector =
|
||||
get_proto_vector.value();
|
||||
// TODO: move to register natives.
|
||||
jclass byte_array_cls = env->FindClass("[B");
|
||||
jobjectArray proto_array =
|
||||
env->NewObjectArray(proto_vector.size(), env->FindClass("[B"), nullptr);
|
||||
env->NewObjectArray(proto_vector.size(), byte_array_cls, nullptr);
|
||||
env->DeleteLocalRef(byte_array_cls);
|
||||
for (int i = 0; i < proto_vector.size(); ++i) {
|
||||
const ::mediapipe::proto_ns::MessageLite* proto_message = proto_vector[i];
|
||||
|
||||
|
|
|
@ -137,6 +137,7 @@ void RegisterGraphNatives(JNIEnv *env) {
|
|||
AddJNINativeMethod(&graph_methods, graph, "nativeGetProfiler", "(J)J",
|
||||
(void *)&GRAPH_METHOD(nativeGetProfiler));
|
||||
RegisterNativesVector(env, graph_class, graph_methods);
|
||||
env->DeleteLocalRef(graph_class);
|
||||
}
|
||||
|
||||
void RegisterGraphProfilerNatives(JNIEnv *env) {
|
||||
|
@ -151,6 +152,7 @@ void RegisterGraphProfilerNatives(JNIEnv *env) {
|
|||
&graph_profiler_methods, graph_profiler, "nativeGetCalculatorProfiles",
|
||||
"(J)[[B", (void *)&GRAPH_PROFILER_METHOD(nativeGetCalculatorProfiles));
|
||||
RegisterNativesVector(env, graph_profiler_class, graph_profiler_methods);
|
||||
env->DeleteLocalRef(graph_profiler_class);
|
||||
}
|
||||
|
||||
void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
|
||||
|
@ -171,6 +173,7 @@ void RegisterAndroidAssetUtilNatives(JNIEnv *env) {
|
|||
(void *)&ANDROID_ASSET_UTIL_METHOD(nativeInitializeAssetManager));
|
||||
RegisterNativesVector(env, android_asset_util_class,
|
||||
android_asset_util_methods);
|
||||
env->DeleteLocalRef(android_asset_util_class);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -191,6 +194,7 @@ void RegisterAndroidPacketCreatorNatives(JNIEnv *env) {
|
|||
(void *)&ANDROID_PACKET_CREATOR_METHOD(nativeCreateRgbImageFrame));
|
||||
RegisterNativesVector(env, android_packet_creator_class,
|
||||
android_packet_creator_methods);
|
||||
env->DeleteLocalRef(android_packet_creator_class);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -232,6 +236,7 @@ void RegisterPacketCreatorNatives(JNIEnv *env) {
|
|||
"(JL" + serialized_message_name + ";)J",
|
||||
(void *)&PACKET_CREATOR_METHOD(nativeCreateProto));
|
||||
RegisterNativesVector(env, packet_creator_class, packet_creator_methods);
|
||||
env->DeleteLocalRef(packet_creator_class);
|
||||
}
|
||||
|
||||
void RegisterPacketGetterNatives(JNIEnv *env) {
|
||||
|
@ -260,6 +265,7 @@ void RegisterPacketGetterNatives(JNIEnv *env) {
|
|||
"nativeGetFloat32Vector", "(J)[F",
|
||||
(void *)&PACKET_GETTER_METHOD(nativeGetFloat32Vector));
|
||||
RegisterNativesVector(env, packet_getter_class, packet_getter_methods);
|
||||
env->DeleteLocalRef(packet_getter_class);
|
||||
}
|
||||
|
||||
void RegisterPacketNatives(JNIEnv *env) {
|
||||
|
@ -278,6 +284,7 @@ void RegisterPacketNatives(JNIEnv *env) {
|
|||
AddJNINativeMethod(&packet_methods, packet, "nativeIsEmpty", "(J)Z",
|
||||
(void *)&PACKET_METHOD(nativeIsEmpty));
|
||||
RegisterNativesVector(env, packet_class, packet_methods);
|
||||
env->DeleteLocalRef(packet_class);
|
||||
}
|
||||
|
||||
void RegisterCompatNatives(JNIEnv *env) {
|
||||
|
@ -293,6 +300,7 @@ void RegisterCompatNatives(JNIEnv *env) {
|
|||
"(I)J",
|
||||
(void *)&COMPAT_METHOD(getCurrentNativeEGLSurface));
|
||||
RegisterNativesVector(env, compat_class, compat_methods);
|
||||
env->DeleteLocalRef(compat_class);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -95,13 +95,12 @@ public class ImageSolutionResult implements SolutionResult {
|
|||
}
|
||||
}
|
||||
|
||||
// Releases image packet and the underlying data.
|
||||
void releaseImagePackets() {
|
||||
imagePacket.release();
|
||||
// Clears the underlying image packets to prevent the callers from accessing the invalid packets
|
||||
// outside of the output callback method.
|
||||
void clearImagePackets() {
|
||||
imagePacket = null;
|
||||
if (imageResultPackets != null) {
|
||||
for (Packet p : imageResultPackets) {
|
||||
p.release();
|
||||
}
|
||||
imageResultPackets.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,12 +90,9 @@ public class OutputHandler<T extends SolutionResult> {
|
|||
Log.e(TAG, "Error occurs when getting MediaPipe solution result. " + e);
|
||||
}
|
||||
} finally {
|
||||
for (Packet packet : packets) {
|
||||
packet.release();
|
||||
}
|
||||
if (solutionResult instanceof ImageSolutionResult) {
|
||||
ImageSolutionResult imageSolutionResult = (ImageSolutionResult) solutionResult;
|
||||
imageSolutionResult.releaseImagePackets();
|
||||
imageSolutionResult.clearImagePackets();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ android_library(
|
|||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework/formats:detection_java_proto_lite",
|
||||
"//mediapipe/framework/formats:location_data_java_proto_lite",
|
||||
"//mediapipe/java/com/google/mediapipe/framework:android_framework",
|
||||
"//mediapipe/java/com/google/mediapipe/solutioncore:solution_base",
|
||||
"//third_party:autovalue",
|
||||
|
|
|
@ -17,7 +17,6 @@ package com.google.mediapipe.solutions.facedetection;
|
|||
import android.content.Context;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.mediapipe.formats.proto.DetectionProto.Detection;
|
||||
import com.google.mediapipe.formats.proto.LocationDataProto.LocationData.RelativeKeypoint;
|
||||
import com.google.mediapipe.framework.MediaPipeException;
|
||||
import com.google.mediapipe.framework.Packet;
|
||||
import com.google.mediapipe.solutioncore.ErrorListener;
|
||||
|
@ -104,27 +103,4 @@ public class FaceDetection extends ImageSolutionBase {
|
|||
this.outputHandler.setErrorListener(listener);
|
||||
this.errorListener = listener;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a specific face keypoint by face index and face keypoint type.
|
||||
*
|
||||
* @param result the returned {@link FaceDetectionResult} object.
|
||||
* @param faceIndex the face index. A smaller index maps to a detected face with a higher
|
||||
* confidence score.
|
||||
* @param faceKeypointType the face keypoint type defined in {@link FaceKeypoint}.
|
||||
*/
|
||||
public static RelativeKeypoint getFaceKeypoint(
|
||||
FaceDetectionResult result,
|
||||
int faceIndex,
|
||||
@FaceKeypoint.FaceKeypointType int faceKeypointType) {
|
||||
if (result == null
|
||||
|| faceIndex >= result.multiFaceDetections().size()
|
||||
|| faceKeypointType >= FaceKeypoint.NUM_KEY_POINTS) {
|
||||
return RelativeKeypoint.getDefaultInstance();
|
||||
}
|
||||
Detection detection = result.multiFaceDetections().get(faceIndex);
|
||||
float x = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getX();
|
||||
float y = detection.getLocationData().getRelativeKeypoints(faceKeypointType).getY();
|
||||
return RelativeKeypoint.newBuilder().setX(x).setY(y).build();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,12 +23,13 @@ android_library(
|
|||
"HandsResult.java",
|
||||
],
|
||||
assets = [
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_full.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_lite.tflite",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_cpu_image.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_tracking_gpu_image.binarypb",
|
||||
"//mediapipe/modules/hand_landmark:handedness.txt",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_full.tflite",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_lite.tflite",
|
||||
],
|
||||
assets_dir = "",
|
||||
javacopts = ["-Acom.google.auto.value.AutoBuilderIsUnstable"],
|
||||
|
|
|
@ -18,9 +18,10 @@ import android.content.Context;
|
|||
import com.google.auto.value.AutoValue;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
|
||||
import com.google.mediapipe.formats.proto.ClassificationProto.ClassificationList;
|
||||
import com.google.mediapipe.framework.MediaPipeException;
|
||||
import com.google.mediapipe.framework.Packet;
|
||||
import com.google.mediapipe.solutioncore.ErrorListener;
|
||||
|
@ -28,7 +29,9 @@ import com.google.mediapipe.solutioncore.ImageSolutionBase;
|
|||
import com.google.mediapipe.solutioncore.OutputHandler;
|
||||
import com.google.mediapipe.solutioncore.ResultListener;
|
||||
import com.google.mediapipe.solutioncore.SolutionInfo;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
|
@ -85,10 +88,15 @@ public class Hands extends ImageSolutionBase {
|
|||
private static final String CPU_GRAPH_NAME = "hand_landmark_tracking_cpu_image.binarypb";
|
||||
private static final String IMAGE_INPUT_STREAM = "image";
|
||||
private static final ImmutableList<String> OUTPUT_STREAMS =
|
||||
ImmutableList.of("multi_hand_landmarks", "multi_handedness", "throttled_image");
|
||||
ImmutableList.of(
|
||||
"multi_hand_landmarks",
|
||||
"multi_hand_world_landmarks",
|
||||
"multi_handedness",
|
||||
"throttled_image");
|
||||
private static final int LANDMARKS_INDEX = 0;
|
||||
private static final int HANDEDNESS_INDEX = 1;
|
||||
private static final int INPUT_IMAGE_INDEX = 2;
|
||||
private static final int WORLD_LANDMARKS_INDEX = 1;
|
||||
private static final int HANDEDNESS_INDEX = 2;
|
||||
private static final int INPUT_IMAGE_INDEX = 3;
|
||||
private final OutputHandler<HandsResult> outputHandler;
|
||||
|
||||
/**
|
||||
|
@ -109,8 +117,18 @@ public class Hands extends ImageSolutionBase {
|
|||
reportError("Error occurs while getting MediaPipe hand landmarks.", e);
|
||||
}
|
||||
try {
|
||||
handsResultBuilder.setMultiHandedness(
|
||||
getProtoVector(packets.get(HANDEDNESS_INDEX), Classification.parser()));
|
||||
handsResultBuilder.setMultiHandWorldLandmarks(
|
||||
getProtoVector(packets.get(WORLD_LANDMARKS_INDEX), LandmarkList.parser()));
|
||||
} catch (MediaPipeException e) {
|
||||
reportError("Error occurs while getting MediaPipe hand world landmarks.", e);
|
||||
}
|
||||
try {
|
||||
List<Classification> handednessList = new ArrayList<>();
|
||||
for (ClassificationList protolist :
|
||||
getProtoVector(packets.get(HANDEDNESS_INDEX), ClassificationList.parser())) {
|
||||
handednessList.add(protolist.getClassification(0));
|
||||
}
|
||||
handsResultBuilder.setMultiHandedness(handednessList);
|
||||
} catch (MediaPipeException e) {
|
||||
reportError("Error occurs while getting MediaPipe handedness data.", e);
|
||||
}
|
||||
|
@ -155,21 +173,4 @@ public class Hands extends ImageSolutionBase {
|
|||
this.outputHandler.setErrorListener(listener);
|
||||
this.errorListener = listener;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a specific hand landmark by hand index and hand landmark type.
|
||||
*
|
||||
* @param result the returned {@link HandsResult} object.
|
||||
* @param handIndex the hand index. The hand landmark lists are sorted by the confidence score.
|
||||
* @param landmarkType the hand landmark type defined in {@link HandLandmark}.
|
||||
*/
|
||||
public static NormalizedLandmark getHandLandmark(
|
||||
HandsResult result, int handIndex, @HandLandmark.HandLandmarkType int landmarkType) {
|
||||
if (result == null
|
||||
|| handIndex >= result.multiHandLandmarks().size()
|
||||
|| landmarkType >= HandLandmark.NUM_LANDMARKS) {
|
||||
return NormalizedLandmark.getDefaultInstance();
|
||||
}
|
||||
return result.multiHandLandmarks().get(handIndex).getLandmarkList().get(landmarkType);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package com.google.mediapipe.solutions.hands;
|
|||
import android.graphics.Bitmap;
|
||||
import com.google.auto.value.AutoBuilder;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.LandmarkList;
|
||||
import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList;
|
||||
import com.google.mediapipe.formats.proto.ClassificationProto.Classification;
|
||||
import com.google.mediapipe.framework.Packet;
|
||||
|
@ -31,14 +32,17 @@ import java.util.List;
|
|||
*/
|
||||
public class HandsResult extends ImageSolutionResult {
|
||||
private final ImmutableList<NormalizedLandmarkList> multiHandLandmarks;
|
||||
private final ImmutableList<LandmarkList> multiHandWorldLandmarks;
|
||||
private final ImmutableList<Classification> multiHandedness;
|
||||
|
||||
HandsResult(
|
||||
ImmutableList<NormalizedLandmarkList> multiHandLandmarks,
|
||||
ImmutableList<LandmarkList> multiHandWorldLandmarks,
|
||||
ImmutableList<Classification> multiHandedness,
|
||||
Packet imagePacket,
|
||||
long timestamp) {
|
||||
this.multiHandLandmarks = multiHandLandmarks;
|
||||
this.multiHandWorldLandmarks = multiHandWorldLandmarks;
|
||||
this.multiHandedness = multiHandedness;
|
||||
this.timestamp = timestamp;
|
||||
this.imagePacket = imagePacket;
|
||||
|
@ -53,6 +57,12 @@ public class HandsResult extends ImageSolutionResult {
|
|||
return multiHandLandmarks;
|
||||
}
|
||||
|
||||
// Collection of detected/tracked hands' landmarks in real-world 3D coordinates that are in meters
|
||||
// with the origin at the hand's approximate geometric center.
|
||||
public ImmutableList<LandmarkList> multiHandWorldLandmarks() {
|
||||
return multiHandWorldLandmarks;
|
||||
}
|
||||
|
||||
// Collection of handedness of the detected/tracked hands (i.e. is it a left or right hand). Each
|
||||
// hand is composed of label and score. label is a string of value either "Left" or "Right". score
|
||||
// is the estimated probability of the predicted handedness and is always greater than or equal to
|
||||
|
@ -70,6 +80,8 @@ public class HandsResult extends ImageSolutionResult {
|
|||
public abstract static class Builder {
|
||||
abstract Builder setMultiHandLandmarks(List<NormalizedLandmarkList> value);
|
||||
|
||||
abstract Builder setMultiHandWorldLandmarks(List<LandmarkList> value);
|
||||
|
||||
abstract Builder setMultiHandedness(List<Classification> value);
|
||||
|
||||
abstract Builder setTimestamp(long value);
|
||||
|
|
|
@ -24,7 +24,6 @@ package(default_visibility = ["//visibility:public"])
|
|||
exports_files([
|
||||
"hand_landmark_full.tflite",
|
||||
"hand_landmark_lite.tflite",
|
||||
"hand_landmark_sparse.tflite",
|
||||
"handedness.txt",
|
||||
])
|
||||
|
||||
|
@ -56,6 +55,7 @@ mediapipe_simple_subgraph(
|
|||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -75,6 +75,7 @@ mediapipe_simple_subgraph(
|
|||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
||||
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the given ROI.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (ClassificationList)
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
|
@ -77,11 +87,13 @@ node {
|
|||
output_stream: "landmark_tensors"
|
||||
output_stream: "hand_flag_tensor"
|
||||
output_stream: "handedness_tensor"
|
||||
output_stream: "world_landmark_tensor"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -175,3 +187,33 @@ node {
|
|||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||
}
|
||||
|
||||
# Drops world landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "world_landmark_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_world_landmark_tensor"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "WorldLandmarkProjectionCalculator"
|
||||
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||
}
|
||||
|
|
|
@ -20,6 +20,16 @@ input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
||||
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the given ROI.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (ClassificationList)
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
|
@ -71,11 +81,13 @@ node {
|
|||
output_stream: "landmark_tensors"
|
||||
output_stream: "hand_flag_tensor"
|
||||
output_stream: "handedness_tensor"
|
||||
output_stream: "world_landmark_tensor"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -169,3 +181,33 @@ node {
|
|||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||
}
|
||||
|
||||
# Drops world landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "world_landmark_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_world_landmark_tensor"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "WorldLandmarkProjectionCalculator"
|
||||
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
|
|||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
|
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a Classification proto.
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
|
@ -89,6 +103,7 @@ node {
|
|||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionCpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:palm_detection_image"
|
||||
output_stream: "DETECTIONS:all_palm_detections"
|
||||
}
|
||||
|
@ -186,12 +201,13 @@ node {
|
|||
input_stream: "IMAGE:image_for_landmarks"
|
||||
input_stream: "ROI:single_hand_rect"
|
||||
output_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:single_handedness"
|
||||
}
|
||||
|
||||
# Collects the handedness for each single hand into a vector. Upon
|
||||
# receiving the BATCH_END timestamp, outputs a vector of classification at the
|
||||
# BATCH_END timestamp.
|
||||
# Collects the handedness for each single hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopClassificationListCalculator"
|
||||
input_stream: "ITEM:single_handedness"
|
||||
|
@ -218,6 +234,16 @@ node {
|
|||
output_stream: "ITERABLE:multi_hand_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of world landmarks for each hand into a vector. Upon receiving
|
||||
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_world_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_world_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
|
|
|
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
|
|||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
|
@ -25,6 +25,7 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
|
@ -32,8 +33,22 @@ output_stream: "IMAGE:throttled_image"
|
|||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a Classification proto.
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
|
@ -93,6 +108,7 @@ node {
|
|||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
|
|
|
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
|
|||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
|
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a Classification proto.
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
|
@ -89,6 +103,7 @@ node {
|
|||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionGpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:palm_detection_image"
|
||||
output_stream: "DETECTIONS:all_palm_detections"
|
||||
}
|
||||
|
@ -187,12 +202,13 @@ node {
|
|||
input_stream: "IMAGE:image_for_landmarks"
|
||||
input_stream: "ROI:single_hand_rect"
|
||||
output_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:single_handedness"
|
||||
}
|
||||
|
||||
# Collects the handedness for each single hand into a vector. Upon
|
||||
# receiving the BATCH_END timestamp, outputs a vector of classification at the
|
||||
# BATCH_END timestamp.
|
||||
# Collects the handedness for each single hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopClassificationListCalculator"
|
||||
input_stream: "ITEM:single_handedness"
|
||||
|
@ -219,6 +235,16 @@ node {
|
|||
output_stream: "ITERABLE:multi_hand_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of world landmarks for each hand into a vector. Upon receiving
|
||||
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_world_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_world_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
|
|
|
@ -14,9 +14,9 @@ input_stream: "IMAGE:image"
|
|||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
|
@ -30,8 +30,22 @@ input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a Classification proto.
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
|
@ -93,6 +107,7 @@ node {
|
|||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
|
|
|
@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
|
|||
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
||||
|
@ -72,5 +77,6 @@ node {
|
|||
calculator: "FaceLandmarkCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_tracking_roi"
|
||||
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
|
|
@ -8,6 +8,11 @@ input_stream: "IMAGE:input_video"
|
|||
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
||||
|
@ -72,5 +77,6 @@ node {
|
|||
calculator: "FaceLandmarkGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_tracking_roi"
|
||||
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
# output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
|||
# jitter. If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
@ -135,5 +141,6 @@ node {
|
|||
calculator: "FaceLandmarksFromPoseCpu"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
# output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
@ -70,6 +71,11 @@ input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
|||
# jitter. If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
@ -135,5 +141,6 @@ node {
|
|||
calculator: "FaceLandmarksFromPoseGpu"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
|
|
@ -21,13 +21,29 @@ licenses(["notice"])
|
|||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
exports_files(["palm_detection.tflite"])
|
||||
exports_files([
|
||||
"palm_detection_lite.tflite",
|
||||
"palm_detection_full.tflite",
|
||||
])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "palm_detection_model_loader",
|
||||
graph = "palm_detection_model_loader.pbtxt",
|
||||
register_as = "PalmDetectionModelLoader",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_model_calculator",
|
||||
"//mediapipe/calculators/util:local_file_contents_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "palm_detection_cpu",
|
||||
graph = "palm_detection_cpu.pbtxt",
|
||||
register_as = "PalmDetectionCpu",
|
||||
deps = [
|
||||
":palm_detection_model_loader",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
|
@ -43,6 +59,7 @@ mediapipe_simple_subgraph(
|
|||
graph = "palm_detection_gpu.pbtxt",
|
||||
register_as = "PalmDetectionGpu",
|
||||
deps = [
|
||||
":palm_detection_model_loader",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
|
@ -52,10 +69,3 @@ mediapipe_simple_subgraph(
|
|||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = [
|
||||
"palm_detection.tflite",
|
||||
"palm_detection_labelmap.txt",
|
||||
],
|
||||
)
|
||||
|
|
Binary file not shown.
|
@ -5,6 +5,11 @@ type: "PalmDetectionCpu"
|
|||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
|
||||
# latency generally go up with the model complexity. If unspecified, functions
|
||||
# as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of palms detected. However, the MediaPipe
|
||||
|
@ -21,11 +26,11 @@ node {
|
|||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
|
@ -39,6 +44,13 @@ node {
|
|||
output_side_packet: "opresolver"
|
||||
}
|
||||
|
||||
# Loads the palm detection TF Lite model.
|
||||
node {
|
||||
calculator: "PalmDetectionModelLoader"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
|
@ -47,9 +59,9 @@ node {
|
|||
input_stream: "TENSORS:input_tensor"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
|
||||
input_side_packet: "MODEL:model"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
|
||||
delegate { xnnpack {} }
|
||||
}
|
||||
}
|
||||
|
@ -65,8 +77,8 @@ node {
|
|||
num_layers: 4
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 128
|
||||
input_size_width: 128
|
||||
input_size_width: 192
|
||||
input_size_height: 192
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
|
@ -90,7 +102,7 @@ node {
|
|||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 896
|
||||
num_boxes: 2016
|
||||
num_coords: 18
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
|
@ -100,10 +112,10 @@ node {
|
|||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
|
||||
x_scale: 128.0
|
||||
y_scale: 128.0
|
||||
h_scale: 128.0
|
||||
w_scale: 128.0
|
||||
x_scale: 192.0
|
||||
y_scale: 192.0
|
||||
w_scale: 192.0
|
||||
h_scale: 192.0
|
||||
min_score_thresh: 0.5
|
||||
}
|
||||
}
|
||||
|
|
BIN
mediapipe/modules/palm_detection/palm_detection_full.tflite
Executable file
BIN
mediapipe/modules/palm_detection/palm_detection_full.tflite
Executable file
Binary file not shown.
|
@ -5,6 +5,11 @@ type: "PalmDetectionGpu"
|
|||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
|
||||
# latency generally go up with the model complexity. If unspecified, functions
|
||||
# as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of palms detected. However, the MediaPipe
|
||||
|
@ -21,11 +26,11 @@ node {
|
|||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
|
@ -45,6 +50,13 @@ node {
|
|||
}
|
||||
}
|
||||
|
||||
# Loads the palm detection TF Lite model.
|
||||
node {
|
||||
calculator: "PalmDetectionModelLoader"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
|
@ -53,10 +65,10 @@ node {
|
|||
input_stream: "TENSORS:input_tensor"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
|
||||
input_side_packet: "MODEL:model"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/palm_detection/palm_detection.tflite"
|
||||
use_gpu: true
|
||||
delegate { gpu {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -71,8 +83,8 @@ node {
|
|||
num_layers: 4
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 128
|
||||
input_size_width: 128
|
||||
input_size_width: 192
|
||||
input_size_height: 192
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
|
@ -96,7 +108,7 @@ node {
|
|||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 896
|
||||
num_boxes: 2016
|
||||
num_coords: 18
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
|
@ -106,10 +118,10 @@ node {
|
|||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
|
||||
x_scale: 128.0
|
||||
y_scale: 128.0
|
||||
h_scale: 128.0
|
||||
w_scale: 128.0
|
||||
x_scale: 192.0
|
||||
y_scale: 192.0
|
||||
w_scale: 192.0
|
||||
h_scale: 192.0
|
||||
min_score_thresh: 0.5
|
||||
}
|
||||
}
|
||||
|
|
BIN
mediapipe/modules/palm_detection/palm_detection_lite.tflite
Executable file
BIN
mediapipe/modules/palm_detection/palm_detection_lite.tflite
Executable file
Binary file not shown.
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph to load a selected palm detection TF Lite model.
|
||||
|
||||
type: "PalmDetectionModelLoader"
|
||||
|
||||
# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference
|
||||
# latency generally go up with the model complexity. If unspecified, functions
|
||||
# as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# TF Lite model represented as a FlatBuffer.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
|
||||
output_side_packet: "MODEL:model"
|
||||
|
||||
# Determines path to the desired pose landmark model file.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "SELECT:model_complexity"
|
||||
output_side_packet: "PACKET:model_path"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
select: 1
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/palm_detection/palm_detection_lite.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/palm_detection/palm_detection_full.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the file in the specified path into a blob.
|
||||
node {
|
||||
calculator: "LocalFileContentsCalculator"
|
||||
input_side_packet: "FILE_PATH:model_path"
|
||||
output_side_packet: "CONTENTS:model_blob"
|
||||
options: {
|
||||
[mediapipe.LocalFileContentsCalculatorOptions.ext]: {
|
||||
text_mode: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the input blob into a TF Lite model.
|
||||
node {
|
||||
calculator: "TfLiteModelCalculator"
|
||||
input_side_packet: "MODEL_BLOB:model_blob"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
Binary file not shown.
|
@ -127,6 +127,7 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
|
|||
|
||||
- (void)videoUpdateIfNeeded {
|
||||
CMTime timestamp = [_videoItem currentTime];
|
||||
|
||||
if ([_videoOutput hasNewPixelBufferForItemTime:timestamp]) {
|
||||
CVPixelBufferRef pixelBuffer =
|
||||
[_videoOutput copyPixelBufferForItemTime:timestamp itemTimeForDisplay:nil];
|
||||
|
@ -139,6 +140,12 @@ static CVReturn renderCallback(CVDisplayLinkRef displayLink, const CVTimeStamp*
|
|||
}
|
||||
CFRelease(pixelBuffer);
|
||||
});
|
||||
} else if (!_videoDisplayLink.paused && _videoPlayer.rate == 0) {
|
||||
// The video might be paused by the operating system fo other reasons not catched by the context
|
||||
// of an interruption. If this condition happens the @c _videoDisplayLink will not have a
|
||||
// paused state, while the _videoPlayer will have rate 0 AKA paused. In this scenario we restart
|
||||
// the video playback.
|
||||
[_videoPlayer play];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -124,7 +124,10 @@ class Hands(SolutionBase):
|
|||
'handlandmarkcpu__ThresholdingCalculator.threshold':
|
||||
min_tracking_confidence,
|
||||
},
|
||||
outputs=['multi_hand_landmarks', 'multi_handedness'])
|
||||
outputs=[
|
||||
'multi_hand_landmarks', 'multi_hand_world_landmarks',
|
||||
'multi_handedness'
|
||||
])
|
||||
|
||||
def process(self, image: np.ndarray) -> NamedTuple:
|
||||
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
|
||||
|
@ -137,10 +140,14 @@ class Hands(SolutionBase):
|
|||
ValueError: If the input image is not three channel RGB.
|
||||
|
||||
Returns:
|
||||
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
|
||||
contains the hand landmarks on each detected hand and a "multi_handedness"
|
||||
field that contains the handedness (left v.s. right hand) of the detected
|
||||
hand.
|
||||
A NamedTuple object with the following fields:
|
||||
1) a "multi_hand_landmarks" field that contains the hand landmarks on
|
||||
each detected hand.
|
||||
2) a "multi_hand_world_landmarks" field that contains the hand landmarks
|
||||
on each detected hand in real-world 3D coordinates that are in meters
|
||||
with the origin at the hand's approximate geometric center.
|
||||
3) a "multi_handedness" field that contains the handedness (left v.s.
|
||||
right hand) of the detected hand.
|
||||
"""
|
||||
|
||||
return super().process(input_data={'image': image})
|
||||
|
|
|
@ -34,20 +34,20 @@ from mediapipe.python.solutions import hands as mp_hands
|
|||
TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
|
||||
LITE_MODEL_DIFF_THRESHOLD = 25 # pixels
|
||||
FULL_MODEL_DIFF_THRESHOLD = 20 # pixels
|
||||
EXPECTED_HAND_COORDINATES_PREDICTION = [[[138, 343], [211, 330], [257, 286],
|
||||
[289, 237], [322, 203], [219, 216],
|
||||
[238, 138], [249, 90], [253, 51],
|
||||
[177, 204], [184, 115], [187, 60],
|
||||
[185, 19], [138, 208], [131, 127],
|
||||
[124, 77], [117, 36], [106, 222],
|
||||
[92, 159], [79, 124], [68, 93]],
|
||||
[[580, 34], [504, 50], [459, 94],
|
||||
EXPECTED_HAND_COORDINATES_PREDICTION = [[[580, 34], [504, 50], [459, 94],
|
||||
[429, 146], [397, 182], [507, 167],
|
||||
[479, 245], [469, 292], [464, 330],
|
||||
[545, 180], [534, 265], [533, 319],
|
||||
[536, 360], [581, 172], [587, 252],
|
||||
[593, 304], [599, 346], [615, 168],
|
||||
[628, 223], [638, 258], [648, 288]]]
|
||||
[628, 223], [638, 258], [648, 288]],
|
||||
[[138, 343], [211, 330], [257, 286],
|
||||
[289, 237], [322, 203], [219, 216],
|
||||
[238, 138], [249, 90], [253, 51],
|
||||
[177, 204], [184, 115], [187, 60],
|
||||
[185, 19], [138, 208], [131, 127],
|
||||
[124, 77], [117, 36], [106, 222],
|
||||
[92, 159], [79, 124], [68, 93]]]
|
||||
|
||||
|
||||
class HandsTest(parameterized.TestCase):
|
||||
|
|
|
@ -80,6 +80,7 @@ class Holistic(SolutionBase):
|
|||
smooth_landmarks=True,
|
||||
enable_segmentation=False,
|
||||
smooth_segmentation=True,
|
||||
refine_face_landmarks=False,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5):
|
||||
"""Initializes a MediaPipe Holistic object.
|
||||
|
@ -98,6 +99,10 @@ class Holistic(SolutionBase):
|
|||
smooth_segmentation: Whether to filter segmentation across different input
|
||||
images to reduce jitter. See details in
|
||||
https://solutions.mediapipe.dev/holistic#smooth_segmentation.
|
||||
refine_face_landmarks: Whether to further refine the landmark coordinates
|
||||
around the eyes and lips, and output additional landmarks around the
|
||||
irises. Default to False. See details in
|
||||
https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
|
||||
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
|
||||
detection to be considered successful. See details in
|
||||
https://solutions.mediapipe.dev/holistic#min_detection_confidence.
|
||||
|
@ -114,6 +119,7 @@ class Holistic(SolutionBase):
|
|||
'enable_segmentation': enable_segmentation,
|
||||
'smooth_segmentation':
|
||||
smooth_segmentation and not static_image_mode,
|
||||
'refine_face_landmarks': refine_face_landmarks,
|
||||
'use_prev_landmarks': not static_image_mode,
|
||||
},
|
||||
calculator_params={
|
||||
|
|
|
@ -99,18 +99,23 @@ class PoseTest(parameterized.TestCase):
|
|||
results = holistic.process(image)
|
||||
self.assertIsNone(results.pose_landmarks)
|
||||
|
||||
@parameterized.named_parameters(('static_lite', True, 0, 3),
|
||||
('static_full', True, 1, 3),
|
||||
('static_heavy', True, 2, 3),
|
||||
('video_lite', False, 0, 3),
|
||||
('video_full', False, 1, 3),
|
||||
('video_heavy', False, 2, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity, num_frames):
|
||||
@parameterized.named_parameters(('static_lite', True, 0, False, 3),
|
||||
('static_full', True, 1, False, 3),
|
||||
('static_heavy', True, 2, False, 3),
|
||||
('video_lite', False, 0, False, 3),
|
||||
('video_full', False, 1, False, 3),
|
||||
('video_heavy', False, 2, False, 3),
|
||||
('static_full_refine_face', True, 1, True, 3),
|
||||
('video_full_refine_face', False, 1, True, 3))
|
||||
def test_on_image(self, static_image_mode, model_complexity,
|
||||
refine_face_landmarks, num_frames):
|
||||
image_path = os.path.join(os.path.dirname(__file__),
|
||||
'testdata/holistic.jpg')
|
||||
image = cv2.imread(image_path)
|
||||
with mp_holistic.Holistic(static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity) as holistic:
|
||||
with mp_holistic.Holistic(
|
||||
static_image_mode=static_image_mode,
|
||||
model_complexity=model_complexity,
|
||||
refine_face_landmarks=refine_face_landmarks) as holistic:
|
||||
for idx in range(num_frames):
|
||||
results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
self._annotate(image.copy(), results, idx)
|
||||
|
@ -129,7 +134,8 @@ class PoseTest(parameterized.TestCase):
|
|||
EXPECTED_RIGHT_HAND_LANDMARKS,
|
||||
HAND_DIFF_THRESHOLD)
|
||||
# TODO: Verify the correctness of the face landmarks.
|
||||
self.assertLen(results.face_landmarks.landmark, 468)
|
||||
self.assertLen(results.face_landmarks.landmark,
|
||||
478 if refine_face_landmarks else 468)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue
Block a user