Internal change

PiperOrigin-RevId: 523751152
This commit is contained in:
MediaPipe Team 2023-04-12 11:08:48 -07:00 committed by Copybara-Service
parent f9a2d0995d
commit 049ba8bbca
7 changed files with 253 additions and 246 deletions

View File

@ -29,6 +29,8 @@ cc_library(
"//mediapipe/calculators/tensor:tensors_to_detections_calculator_cc_proto",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator_cc_proto",
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:detection_projection_calculator",
"//mediapipe/calculators/util:detection_transformation_calculator",
"//mediapipe/calculators/util:detections_to_rects_calculator",

View File

@ -131,7 +131,7 @@ void ConfigureNonMaxSuppressionCalculator(
void ConfigureDetectionsToRectsCalculator(
mediapipe::DetectionsToRectsCalculatorOptions* options) {
options->set_rotation_vector_start_keypoint_index(0);
options->set_rotation_vector_end_keypoint_index(2);
options->set_rotation_vector_end_keypoint_index(1);
options->set_rotation_vector_target_angle(90);
options->set_output_zero_rect_for_empty_detections(true);
}
@ -140,12 +140,20 @@ void ConfigureDetectionsToRectsCalculator(
// detector with model metadata.
void ConfigureRectTransformationCalculator(
mediapipe::RectTransformationCalculatorOptions* options) {
options->set_scale_x(2.6);
options->set_scale_y(2.6);
options->set_shift_y(-0.5);
options->set_scale_x(1.25);
options->set_scale_y(1.25);
options->set_square_long(true);
}
void ConfigureAlignmentPointsRectsCalculator(
mediapipe::DetectionsToRectsCalculatorOptions* options) {
// Derived from
// mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt
options->set_rotation_vector_start_keypoint_index(0);
options->set_rotation_vector_end_keypoint_index(1);
options->set_rotation_vector_target_angle_degrees(90);
}
} // namespace
// A "mediapipe.tasks.vision.pose_detector.PoseDetectorGraph" performs pose
@ -246,8 +254,8 @@ class PoseDetectorGraph : public core::ModelTaskGraph {
image_in >> preprocessing.In(kImageTag);
norm_rect_in >> preprocessing.In(kNormRectTag);
auto preprocessed_tensors = preprocessing.Out(kTensorsTag);
auto matrix = preprocessing.Out(kMatrixTag);
auto image_size = preprocessing.Out(kImageSizeTag);
auto letterbox_padding = preprocessing.Out("LETTERBOX_PADDING");
// Pose detection model inferece.
auto& inference = AddInference(
@ -281,14 +289,38 @@ class PoseDetectorGraph : public core::ModelTaskGraph {
&non_maximum_suppression
.GetOptions<mediapipe::NonMaxSuppressionCalculatorOptions>());
detections >> non_maximum_suppression.In("");
auto nms_detections = non_maximum_suppression.Out("");
auto filtered_detections = non_maximum_suppression.Out("");
// Projects detections back into the input image coordinates system.
auto& detection_projection = graph.AddNode("DetectionProjectionCalculator");
nms_detections >> detection_projection.In(kDetectionsTag);
matrix >> detection_projection.In(kProjectionMatrixTag);
Source<std::vector<Detection>> pose_detections =
detection_projection.Out(kDetectionsTag).Cast<std::vector<Detection>>();
// Adjust detections on the letterboxed image.
auto& detection_letterbox_removal =
graph.AddNode("DetectionLetterboxRemovalCalculator");
filtered_detections >> detection_letterbox_removal.In("DETECTIONS");
letterbox_padding >> detection_letterbox_removal.In("LETTERBOX_PADDING");
Source<std::vector<Detection>> adjusted_detections =
detection_letterbox_removal.Out("DETECTIONS")
.Cast<std::vector<Detection>>();
// Converts pose detection into a rectangle based on center and scale
// alignment points.
auto& detection_to_rects = graph.AddNode("AlignmentPointsRectsCalculator");
ConfigureAlignmentPointsRectsCalculator(
&detection_to_rects
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
image_size >> detection_to_rects.In(kImageSizeTag);
adjusted_detections >> detection_to_rects.In("DETECTIONS");
auto pose_rects = detection_to_rects.Out("NORM_RECTS")
.Cast<std::vector<NormalizedRect>>();
// Expands pose rect with margin used during training.
auto& pose_rect_transformation =
graph.AddNode("RectTransformationCalculator");
ConfigureRectTransformationCalculator(
&pose_rect_transformation
.GetOptions<mediapipe::RectTransformationCalculatorOptions>());
image_size >> pose_rect_transformation.In(kImageSizeTag);
pose_rects >> pose_rect_transformation.In("NORM_RECTS");
auto expanded_pose_rects =
pose_rect_transformation[Output<std::vector<NormalizedRect>>("")];
if (subgraph_options.has_num_poses()) {
// Clip face detections to maximum number of poses.
@ -297,48 +329,13 @@ class PoseDetectorGraph : public core::ModelTaskGraph {
clip_detection_vector_size
.GetOptions<mediapipe::ClipVectorSizeCalculatorOptions>()
.set_max_vec_size(subgraph_options.num_poses());
pose_detections >> clip_detection_vector_size.In("");
pose_detections =
adjusted_detections >> clip_detection_vector_size.In("");
adjusted_detections =
clip_detection_vector_size.Out("").Cast<std::vector<Detection>>();
}
// Converts results of pose detection into a rectangle (normalized by image
// size) that encloses the face and is rotated such that the line connecting
// left eye and right eye is aligned with the X-axis of the rectangle.
auto& detections_to_rects = graph.AddNode("DetectionsToRectsCalculator");
ConfigureDetectionsToRectsCalculator(
&detections_to_rects
.GetOptions<mediapipe::DetectionsToRectsCalculatorOptions>());
image_size >> detections_to_rects.In(kImageSizeTag);
pose_detections >> detections_to_rects.In(kDetectionsTag);
auto pose_rects = detections_to_rects.Out(kNormRectsTag)
.Cast<std::vector<NormalizedRect>>();
// Expands and shifts the rectangle that contains the pose so that it's
// likely to cover the entire pose.
auto& rect_transformation = graph.AddNode("RectTransformationCalculator");
ConfigureRectTransformationCalculator(
&rect_transformation
.GetOptions<mediapipe::RectTransformationCalculatorOptions>());
pose_rects >> rect_transformation.In(kNormRectsTag);
image_size >> rect_transformation.In(kImageSizeTag);
auto expanded_pose_rects =
rect_transformation.Out("").Cast<std::vector<NormalizedRect>>();
// Calculator to convert relative detection bounding boxes to pixel
// detection bounding boxes.
auto& detection_transformation =
graph.AddNode("DetectionTransformationCalculator");
detection_projection.Out(kDetectionsTag) >>
detection_transformation.In(kDetectionsTag);
preprocessing.Out(kImageSizeTag) >>
detection_transformation.In(kImageSizeTag);
auto pose_pixel_detections =
detection_transformation.Out(kPixelDetectionsTag)
.Cast<std::vector<Detection>>();
return PoseDetectionOuts{
/* pose_detections= */ pose_pixel_detections,
/* pose_detections= */ adjusted_detections,
/* pose_rects= */ pose_rects,
/* expanded_pose_rects= */ expanded_pose_rects,
/* image= */ preprocessing.Out(kImageTag).Cast<Image>()};

View File

@ -328,8 +328,8 @@ INSTANTIATE_TEST_SUITE_P(
.test_name = "PoseLandmarkerLiteModel",
.input_model_name = kPoseLandmarkerLiteModel,
.test_image_name = kPoseImage,
.pose_rect = MakePoseRect(0.5450622, 0.31605977, 0.5196669,
0.77911085, 0.50149304),
.pose_rect = MakePoseRect(0.49192297, 0.7013345, 0.6317167,
0.9471016, -0.029253244),
.expected_presence = true,
.expected_landmarks =
GetExpectedLandmarkList(kExpectedPoseLandmarksFilename),
@ -338,8 +338,8 @@ INSTANTIATE_TEST_SUITE_P(
.test_name = "PoseLandmarkerLiteModelNoPose",
.input_model_name = kPoseLandmarkerLiteModel,
.test_image_name = kBurgerImage,
.pose_rect = MakePoseRect(0.5450622, 0.31605977, 0.5196669,
0.77911085, 0.50149304),
.pose_rect = MakePoseRect(0.49192297, 0.7013345, 0.6317167,
0.9471016, -0.029253244),
.expected_presence = false,
.expected_landmarks = std::nullopt,
.landmarks_diff_threshold = kLiteModelFractionDiff}),
@ -353,8 +353,8 @@ INSTANTIATE_TEST_SUITE_P(
.test_name = "MultiPoseLandmarkerLiteModel",
.input_model_name = kPoseLandmarkerLiteModel,
.test_image_name = kPoseImage,
.pose_rects = {MakePoseRect(0.5450622, 0.31605977, 0.5196669,
0.77911085, 0.50149304)},
.pose_rects = {MakePoseRect(0.49192297, 0.7013345, 0.6317167, 0.9471016,
-0.029253244)},
.expected_presences = {true},
.expected_landmark_lists = {GetExpectedLandmarkList(
kExpectedPoseLandmarksFilename)},

View File

@ -1,231 +1,231 @@
landmark {
x: 0.44039154
y: 0.69266146
z: -1.0701033
visibility: 0.99999785
presence: 0.99999964
x: 0.47503802
y: 0.20596696
z: -0.2717698
visibility: 0.9999881
presence: 0.999985
}
landmark {
x: 0.4402231
y: 0.6729447
z: -1.0995388
visibility: 0.9999958
presence: 0.9999989
x: 0.48141137
y: 0.18920818
z: -0.2632932
visibility: 0.9999653
presence: 0.9999397
}
landmark {
x: 0.44525078
y: 0.67028874
z: -1.0994295
visibility: 0.9999949
presence: 0.9999989
x: 0.48735094
y: 0.18857746
z: -0.26326385
visibility: 0.9999536
presence: 0.9999366
}
landmark {
x: 0.44974685
y: 0.6671066
z: -1.099441
visibility: 0.9999943
presence: 0.9999987
x: 0.49283814
y: 0.187905
z: -0.26327085
visibility: 0.9999398
presence: 0.9999262
}
landmark {
x: 0.43495473
y: 0.67640877
z: -1.0970817
visibility: 0.9999975
presence: 0.99999917
x: 0.47124237
y: 0.1904801
z: -0.24174295
visibility: 0.9999498
presence: 0.99994826
}
landmark {
x: 0.43337595
y: 0.6775264
z: -1.0973414
visibility: 0.99999714
presence: 0.99999917
x: 0.46842983
y: 0.1906853
z: -0.24183725
visibility: 0.9999305
presence: 0.99995315
}
landmark {
x: 0.43298554
y: 0.6775605
z: -1.0972598
visibility: 0.99999726
presence: 0.99999905
x: 0.46623105
y: 0.19083896
z: -0.24178317
visibility: 0.9999275
presence: 0.99995124
}
landmark {
x: 0.4708667
y: 0.6595806
z: -1.0533934
visibility: 0.9999908
presence: 0.99999905
x: 0.51211923
y: 0.19381559
z: -0.17863783
visibility: 0.99989283
presence: 0.99994254
}
landmark {
x: 0.44428575
y: 0.67323744
z: -1.0379978
visibility: 0.99999785
presence: 0.9999993
x: 0.47326156
y: 0.19597495
z: -0.07647368
visibility: 0.9999068
presence: 0.9999703
}
landmark {
x: 0.4564836
y: 0.6977895
z: -1.0333056
visibility: 0.9999943
presence: 0.9999994
x: 0.48888734
y: 0.21832445
z: -0.24005938
visibility: 0.9999583
presence: 0.99999404
}
landmark {
x: 0.44782764
y: 0.7037028
z: -1.0296792
visibility: 0.9999962
presence: 0.9999994
x: 0.4750799
y: 0.21996267
z: -0.21063438
visibility: 0.9999558
presence: 0.9999944
}
landmark {
x: 0.5449939
y: 0.62528574
z: -0.7878126
visibility: 0.9999747
presence: 0.9999956
x: 0.58094275
y: 0.27069643
z: -0.13522492
visibility: 0.9999832
presence: 0.999948
}
landmark {
x: 0.3974144
y: 0.68981373
z: -0.77783424
visibility: 0.99997735
presence: 0.99999726
x: 0.44552392
y: 0.27577883
z: 0.024918541
visibility: 0.9999881
presence: 0.9999864
}
landmark {
x: 0.69353175
y: 0.63911355
z: -0.69779164
visibility: 0.99741924
presence: 0.99996626
x: 0.7083446
y: 0.2769637
z: -0.21408014
visibility: 0.99515605
presence: 0.9995721
}
landmark {
x: 0.32092315
y: 0.8199662
z: -0.7256159
visibility: 0.99759066
presence: 0.9999684
x: 0.31485358
y: 0.25585473
z: 0.03827352
visibility: 0.98532397
presence: 0.99995303
}
landmark {
x: 0.83430344
y: 0.5488517
z: -0.7037824
visibility: 0.9987625
presence: 0.99989784
x: 0.83101267
y: 0.26628205
z: -0.3408214
visibility: 0.98725593
presence: 0.99614346
}
landmark {
x: 0.20488566
y: 0.8801585
z: -0.76772463
visibility: 0.99855787
presence: 0.99988043
x: 0.20504552
y: 0.24419393
z: -0.12712422
visibility: 0.9823162
presence: 0.9993932
}
landmark {
x: 0.87126845
y: 0.54215115
z: -0.7420273
visibility: 0.99767953
presence: 0.99979395
x: 0.8703914
y: 0.2622718
z: -0.36847484
visibility: 0.9663167
presence: 0.99103457
}
landmark {
x: 0.21015728
y: 0.8867224
z: -0.8027822
visibility: 0.99664575
presence: 0.9997483
x: 0.16960809
y: 0.2398484
z: -0.14768666
visibility: 0.95874923
presence: 0.99777645
}
landmark {
x: 0.8741963
y: 0.5460341
z: -0.7887856
visibility: 0.9976641
presence: 0.9997937
x: 0.8661166
y: 0.2626395
z: -0.4128364
visibility: 0.96738607
presence: 0.9918098
}
landmark {
x: 0.22013207
y: 0.88651013
z: -0.85653603
visibility: 0.9964618
presence: 0.9997515
x: 0.16942637
y: 0.23765557
z: -0.19633037
visibility: 0.9609766
presence: 0.9979639
}
landmark {
x: 0.8552971
y: 0.5635247
z: -0.7320286
visibility: 0.9979176
presence: 0.9998549
x: 0.851011
y: 0.2654708
z: -0.36451888
visibility: 0.96485573
presence: 0.9941413
}
landmark {
x: 0.23674019
y: 0.87909704
z: -0.7946802
visibility: 0.9968817
presence: 0.9998123
x: 0.18291923
y: 0.24094415
z: -0.15638204
visibility: 0.96108276
presence: 0.9986619
}
landmark {
x: 0.5296566
y: 0.583189
z: -0.0068905717
visibility: 0.99999726
presence: 0.99999833
x: 0.5391705
y: 0.50526816
z: -0.053723037
visibility: 0.9994941
presence: 0.9995571
}
landmark {
x: 0.45126596
y: 0.610716
z: 0.0076607587
visibility: 0.9999982
presence: 0.9999993
x: 0.4622758
y: 0.4999145
z: 0.053916093
visibility: 0.9996294
presence: 0.9997342
}
landmark {
x: 0.5673191
y: 0.67022914
z: -0.012459015
visibility: 0.87660104
presence: 0.99997973
x: 0.675301
y: 0.62948114
z: -0.17877069
visibility: 0.9939932
presence: 0.9988996
}
landmark {
x: 0.40346304
y: 0.68889683
z: 0.048518207
visibility: 0.79370135
presence: 0.9999901
x: 0.28809914
y: 0.52877027
z: -0.1407601
visibility: 0.99657404
presence: 0.9995253
}
landmark {
x: 0.5971223
y: 0.7035845
z: 0.29914334
visibility: 0.9536318
presence: 0.9999622
x: 0.82030344
y: 0.7374987
z: 0.007227801
visibility: 0.98853314
presence: 0.990724
}
landmark {
x: 0.3654526
y: 0.7461876
z: 0.31222725
visibility: 0.9724159
presence: 0.99995387
x: 0.2672157
y: 0.7118606
z: -0.03558438
visibility: 0.99250716
presence: 0.9981616
}
landmark {
x: 0.6009192
y: 0.711494
z: 0.32301757
visibility: 0.8800503
presence: 0.99994063
x: 0.83312243
y: 0.7519515
z: 0.018887112
visibility: 0.9363986
presence: 0.98826605
}
landmark {
x: 0.3758035
y: 0.7603447
z: 0.3248874
visibility: 0.90657604
presence: 0.99992514
x: 0.28136373
y: 0.7367118
z: -0.032466136
visibility: 0.96477795
presence: 0.9976307
}
landmark {
x: 0.60657954
y: 0.71376836
z: 0.16594526
visibility: 0.94293
presence: 0.9998517
x: 0.8624686
y: 0.7670486
z: -0.12530705
visibility: 0.9578498
presence: 0.9776664
}
landmark {
x: 0.32244906
y: 0.75465155
z: 0.12916707
visibility: 0.95600617
presence: 0.9998241
x: 0.20045075
y: 0.7439542
z: -0.17505309
visibility: 0.97366387
presence: 0.99209917
}

View File

@ -1,27 +1,29 @@
# proto-file: mediapipe/framework/formats/detection.proto
# proto-message: Detection
label_id: 0
score: 0.9843089
location_data {
format: BOUNDING_BOX
bounding_box {
xmin: 397
ymin: 198
width: 199
height: 199
format: RELATIVE_BOUNDING_BOX
relative_bounding_box {
xmin: 0.3970945
ymin: 0.29761493
width: 0.1998719
height: 0.29958934
}
relative_keypoints {
x: 0.4879558
x: 0.49192297
y: 0.7013345
}
relative_keypoints {
x: 0.48453212
y: 0.32265592
y: 0.32265595
}
relative_keypoints {
x: 0.4992165
y: 0.4854874
x: 0.49723503
y: 0.48548737
}
relative_keypoints {
x: 0.50227845
y: 0.159788
x: 0.4922851
y: 0.15978798
}
}

View File

@ -1,7 +1,7 @@
# proto-file: mediapipe/framework/formats/rect.proto
# proto-message: NormalizedRect
x_center: 0.5450622
y_center: 0.31605977
width: 0.5196669
height: 0.77911085
rotation: 0.50149304
x_center: 0.49192297
y_center: 0.7013345
height: 0.9471016
width: 0.6317167
rotation: -0.029253244

View File

@ -306,8 +306,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_expected_pose_landmarks_prototxt",
sha256 = "0bb27e9d9729c4171419abf7edd746b4234cb91198d663f3a4363248a49dad1a",
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_pose_landmarks.prototxt?generation=1680543279295598"],
sha256 = "c230e0933e6cb4af69ec21314f3f9930fe13e7bb4bf1dbdb74427e4138c24c1e",
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_pose_landmarks.prototxt?generation=1681240674007127"],
)
http_file(
@ -814,6 +814,12 @@ def external_files():
urls = ["https://storage.googleapis.com/mediapipe-assets/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite?generation=1661875879063676"],
)
http_file(
name = "com_google_mediapipe_ocr_text_jpg",
sha256 = "88052e93aa910330433741f5cef140f8f9ec463230a332aef7038b5457b06482",
urls = ["https://storage.googleapis.com/mediapipe-assets/ocr_text.jpg?generation=1681240679268678"],
)
http_file(
name = "com_google_mediapipe_palm_detection_full_tflite",
sha256 = "1b14e9422c6ad006cde6581a46c8b90dd573c07ab7f3934b5589e7cea3f89a54",
@ -930,14 +936,14 @@ def external_files():
http_file(
name = "com_google_mediapipe_pose_expected_detection_pbtxt",
sha256 = "e0d40e98dd5320a780a642c336d0c8720243ac5bcc0e39c4061ad970a503ae24",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_expected_detection.pbtxt?generation=1678737492211540"],
sha256 = "16866c8dd4fbee60f6972630d73baed219b45824c055c7fbc7dc9a91c4b182cc",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_expected_detection.pbtxt?generation=1681240681879992"],
)
http_file(
name = "com_google_mediapipe_pose_expected_expanded_rect_pbtxt",
sha256 = "babb2a2d50077f6fa9ee15e30d81abb6e98a920e35acad7542bb3d27b5ce7ffd",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_expected_expanded_rect.pbtxt?generation=1680543294008098"],
sha256 = "b0a41d25ed115757606dfc034e9d320a93a52616d92d745150b6a886ddc5a88a",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_expected_expanded_rect.pbtxt?generation=1681240684183698"],
)
http_file(
@ -948,8 +954,8 @@ def external_files():
http_file(
name = "com_google_mediapipe_pose_landmarker_task",
sha256 = "ca4137626f0dc04f87893ccf2ad01949a3b1d4b55fa85ba957dde44a29dd956e",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_landmarker.task?generation=1680543298177615"],
sha256 = "c20284c073a891774f894269a14da4cbe4a84cab034757dab587bc19c9522b7a",
urls = ["https://storage.googleapis.com/mediapipe-assets/pose_landmarker.task?generation=1681240686676992"],
)
http_file(