156 lines
4.8 KiB
Plaintext
156 lines
4.8 KiB
Plaintext
# MediaPipe graph to detect poses. (GPU input, and inference is executed on
|
|
# GPU.)
|
|
#
|
|
# It is required that "pose_detection.tflite" is available at
|
|
# "mediapipe/modules/pose_detection/pose_detection.tflite"
|
|
# path during execution.
|
|
#
|
|
# EXAMPLE:
|
|
# node {
|
|
# calculator: "PoseDetectionGpu"
|
|
# input_stream: "IMAGE:image"
|
|
# output_stream: "DETECTIONS:pose_detections"
|
|
# }
|
|
|
|
type: "PoseDetectionGpu"
|
|
|
|
# GPU image. (GpuBuffer)
|
|
input_stream: "IMAGE:image"
|
|
|
|
# Detected poses. (std::vector<Detection>)
|
|
# Bounding box in each pose detection is currently set to the bounding box of
|
|
# the detected face. However, 4 additional key points are available in each
|
|
# detection, which are used to further calculate a (rotated) bounding box that
|
|
# encloses the body region of interest. Among the 4 key points, the first two
|
|
# are for identifying the full-body region, and the second two for upper body
|
|
# only:
|
|
#
|
|
# Key point 0 - mid hip center
|
|
# Key point 1 - point that encodes size & rotation (for full body)
|
|
# Key point 2 - mid shoulder center
|
|
# Key point 3 - point that encodes size & rotation (for upper body)
|
|
#
|
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
|
# particular timestamp if none of poses detected. However, the MediaPipe
|
|
# framework will internally inform the downstream calculators of the absence of
|
|
# this packet so that they don't wait for it unnecessarily.
|
|
output_stream: "DETECTIONS:detections"
|
|
|
|
# Transforms the input image into a 224x224 one while keeping the aspect ratio
|
|
# (what is expected by the corresponding model), resulting in potential
|
|
# letterboxing in the transformed image.
|
|
node: {
|
|
calculator: "ImageToTensorCalculator"
|
|
input_stream: "IMAGE_GPU:image"
|
|
output_stream: "TENSORS:input_tensors"
|
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
options: {
|
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
|
output_tensor_width: 224
|
|
output_tensor_height: 224
|
|
keep_aspect_ratio: true
|
|
output_tensor_float_range {
|
|
min: -1.0
|
|
max: 1.0
|
|
}
|
|
border_mode: BORDER_ZERO
|
|
gpu_origin: TOP_LEFT
|
|
}
|
|
}
|
|
}
|
|
|
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
|
# scores.
|
|
node {
|
|
calculator: "InferenceCalculator"
|
|
input_stream: "TENSORS:input_tensors"
|
|
output_stream: "TENSORS:detection_tensors"
|
|
options: {
|
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
|
model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"
|
|
#
|
|
delegate: { gpu { use_advanced_gpu_api: true } }
|
|
}
|
|
}
|
|
}
|
|
|
|
# Generates a single side packet containing a vector of SSD anchors based on
|
|
# the specification in the options.
|
|
node {
|
|
calculator: "SsdAnchorsCalculator"
|
|
output_side_packet: "anchors"
|
|
options: {
|
|
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
|
num_layers: 5
|
|
min_scale: 0.1484375
|
|
max_scale: 0.75
|
|
input_size_height: 224
|
|
input_size_width: 224
|
|
anchor_offset_x: 0.5
|
|
anchor_offset_y: 0.5
|
|
strides: 8
|
|
strides: 16
|
|
strides: 32
|
|
strides: 32
|
|
strides: 32
|
|
aspect_ratios: 1.0
|
|
fixed_anchor_size: true
|
|
}
|
|
}
|
|
}
|
|
|
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
|
# the SSD anchors and the specification in the options, into a vector of
|
|
# detections. Each detection describes a detected object.
|
|
node {
|
|
calculator: "TensorsToDetectionsCalculator"
|
|
input_stream: "TENSORS:detection_tensors"
|
|
input_side_packet: "ANCHORS:anchors"
|
|
output_stream: "DETECTIONS:unfiltered_detections"
|
|
options: {
|
|
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
|
num_classes: 1
|
|
num_boxes: 2254
|
|
num_coords: 12
|
|
box_coord_offset: 0
|
|
keypoint_coord_offset: 4
|
|
num_keypoints: 4
|
|
num_values_per_keypoint: 2
|
|
sigmoid_score: true
|
|
score_clipping_thresh: 100.0
|
|
reverse_output_order: true
|
|
x_scale: 224.0
|
|
y_scale: 224.0
|
|
h_scale: 224.0
|
|
w_scale: 224.0
|
|
min_score_thresh: 0.5
|
|
}
|
|
}
|
|
}
|
|
|
|
# Performs non-max suppression to remove excessive detections.
|
|
node {
|
|
calculator: "NonMaxSuppressionCalculator"
|
|
input_stream: "unfiltered_detections"
|
|
output_stream: "filtered_detections"
|
|
options: {
|
|
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
|
min_suppression_threshold: 0.3
|
|
overlap_type: INTERSECTION_OVER_UNION
|
|
algorithm: WEIGHTED
|
|
}
|
|
}
|
|
}
|
|
|
|
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
|
# letterboxed image (after image transformation with the FIT scale mode) to the
|
|
# corresponding locations on the same image with the letterbox removed (the
|
|
# input image to the graph before image transformation).
|
|
node {
|
|
calculator: "DetectionLetterboxRemovalCalculator"
|
|
input_stream: "DETECTIONS:filtered_detections"
|
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
output_stream: "DETECTIONS:detections"
|
|
}
|