187 lines
6.2 KiB
Plaintext
187 lines
6.2 KiB
Plaintext
|
# MediaPipe Objectron on GPU that produces 3D bounding boxes for objects.
|
||
|
type: "ObjectronGpuSubgraph"
|
||
|
|
||
|
# Input/Output streams and input side packets.
|
||
|
# Note that the input image is assumed to have aspect ratio 3:4 (width:height).
|
||
|
input_stream: "IMAGE_GPU:image"
|
||
|
# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera
|
||
|
input_side_packet: "LABELS_CSV:allowed_labels"
|
||
|
# Max number of objects to detect/track. (int)
|
||
|
input_side_packet: "MAX_NUM_OBJECTS:max_num_objects"
|
||
|
# Whether landmarks on the previous image should be used to help localize
|
||
|
# landmarks on the current image. (bool)
|
||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||
|
|
||
|
# Collection of detected 3D objects, represented as a FrameAnnotation.
|
||
|
output_stream: "FRAME_ANNOTATION:detected_objects"
|
||
|
|
||
|
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||
|
# set to true, uses the landmarks on the previous image to help localize
|
||
|
# landmarks on the current image.
|
||
|
node {
|
||
|
calculator: "GateCalculator"
|
||
|
input_side_packet: "ALLOW:use_prev_landmarks"
|
||
|
input_stream: "prev_box_rects_from_landmarks"
|
||
|
output_stream: "gated_prev_box_rects_from_landmarks"
|
||
|
options: {
|
||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||
|
allow: true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Determines if an input vector of NormalizedRect has a size greater than or
|
||
|
# equal to the provided max_num_objects.
|
||
|
node {
|
||
|
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||
|
input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks"
|
||
|
input_side_packet: "max_num_objects"
|
||
|
output_stream: "prev_has_enough_objects"
|
||
|
}
|
||
|
|
||
|
# Drops the incoming image if BoxLandmarkSubgraph was able to identify box
|
||
|
# presence in the previous image. Otherwise, passes the incoming image through
|
||
|
# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph.
|
||
|
node {
|
||
|
calculator: "GateCalculator"
|
||
|
input_stream: "image"
|
||
|
input_stream: "DISALLOW:prev_has_enough_objects"
|
||
|
output_stream: "detection_image"
|
||
|
|
||
|
options: {
|
||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||
|
empty_packets_as_allow: true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Subgraph that performs 2D object detection.
|
||
|
node {
|
||
|
calculator: "ObjectDetectionOidV4Subgraph"
|
||
|
input_stream: "IMAGE_GPU:detection_image"
|
||
|
input_side_packet: "LABELS_CSV:allowed_labels"
|
||
|
output_stream: "DETECTIONS:raw_detections"
|
||
|
}
|
||
|
|
||
|
# Makes sure there are no more detections than provided max_num_objects.
|
||
|
node {
|
||
|
calculator: "ClipDetectionVectorSizeCalculator"
|
||
|
input_stream: "raw_detections"
|
||
|
output_stream: "detections"
|
||
|
input_side_packet: "max_num_objects"
|
||
|
|
||
|
}
|
||
|
|
||
|
# Extracts image size from the input images.
|
||
|
node {
|
||
|
calculator: "ImagePropertiesCalculator"
|
||
|
input_stream: "IMAGE_GPU:image"
|
||
|
output_stream: "SIZE:image_size"
|
||
|
}
|
||
|
|
||
|
# Converts results of box detection into rectangles (normalized by image size)
|
||
|
# that encloses the box.
|
||
|
node {
|
||
|
calculator: "DetectionsToRectsCalculator"
|
||
|
input_stream: "DETECTIONS:detections"
|
||
|
input_stream: "IMAGE_SIZE:image_size"
|
||
|
output_stream: "NORM_RECTS:box_rects_from_detections"
|
||
|
options: {
|
||
|
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||
|
output_zero_rect_for_empty_detections: false
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Performs association between NormalizedRect vector elements from previous
|
||
|
# image and rects based on object detections from the current image. This
|
||
|
# calculator ensures that the output box_rects vector doesn't contain
|
||
|
# overlapping regions based on the specified min_similarity_threshold.
|
||
|
node {
|
||
|
calculator: "AssociationNormRectCalculator"
|
||
|
input_stream: "box_rects_from_detections"
|
||
|
input_stream: "gated_prev_box_rects_from_landmarks"
|
||
|
output_stream: "box_rects"
|
||
|
options: {
|
||
|
[mediapipe.AssociationCalculatorOptions.ext] {
|
||
|
min_similarity_threshold: 0.2
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Outputs each element of box_rects at a fake timestamp for the rest of the
|
||
|
# graph to process. Clones image and image size packets for each
|
||
|
# single_box_rect at the fake timestamp. At the end of the loop, outputs the
|
||
|
# BATCH_END timestamp for downstream calculators to inform them that all
|
||
|
# elements in the vector have been processed.
|
||
|
node {
|
||
|
calculator: "BeginLoopNormalizedRectCalculator"
|
||
|
input_stream: "ITERABLE:box_rects"
|
||
|
input_stream: "CLONE:image"
|
||
|
output_stream: "ITEM:single_box_rect"
|
||
|
output_stream: "CLONE:landmarks_image"
|
||
|
output_stream: "BATCH_END:box_rects_timestamp"
|
||
|
}
|
||
|
|
||
|
# Subgraph that localizes box landmarks.
|
||
|
node {
|
||
|
calculator: "BoxLandmarkSubgraph"
|
||
|
input_stream: "IMAGE:landmarks_image"
|
||
|
input_stream: "NORM_RECT:single_box_rect"
|
||
|
output_stream: "NORM_LANDMARKS:single_box_landmarks"
|
||
|
}
|
||
|
|
||
|
# Collects a set of landmarks for each hand into a vector. Upon receiving the
|
||
|
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||
|
# timestamp.
|
||
|
node {
|
||
|
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||
|
input_stream: "ITEM:single_box_landmarks"
|
||
|
input_stream: "BATCH_END:box_rects_timestamp"
|
||
|
output_stream: "ITERABLE:multi_box_landmarks"
|
||
|
}
|
||
|
|
||
|
# Convert box landmarks to frame annotations.
|
||
|
node {
|
||
|
calculator: "LandmarksToFrameAnnotationCalculator"
|
||
|
input_stream: "MULTI_LANDMARKS:multi_box_landmarks"
|
||
|
output_stream: "FRAME_ANNOTATION:box_annotations"
|
||
|
}
|
||
|
|
||
|
# Lift the 2D landmarks to 3D using EPnP algorithm.
|
||
|
node {
|
||
|
calculator: "Lift2DFrameAnnotationTo3DCalculator"
|
||
|
input_stream: "FRAME_ANNOTATION:box_annotations"
|
||
|
output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects"
|
||
|
options: {
|
||
|
[mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] {
|
||
|
normalized_focal_x: 2.0975
|
||
|
normalized_focal_y: 1.5731
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Get rotated rectangle from detected box.
|
||
|
node {
|
||
|
calculator: "FrameAnnotationToRectCalculator"
|
||
|
input_stream: "FRAME_ANNOTATION:detected_objects"
|
||
|
output_stream: "NORM_RECTS:box_rects_from_landmarks"
|
||
|
}
|
||
|
|
||
|
# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the
|
||
|
# arrival of the next input image sends out the cached rectangle with the
|
||
|
# timestamp replaced by that of the input image, essentially generating a packet
|
||
|
# that carries the previous box rectangle. Note that upon the arrival of the
|
||
|
# very first input image, an empty packet is sent out to jump start the
|
||
|
# feedback loop.
|
||
|
node {
|
||
|
calculator: "PreviousLoopbackCalculator"
|
||
|
input_stream: "MAIN:image"
|
||
|
input_stream: "LOOP:box_rects_from_landmarks"
|
||
|
input_stream_info: {
|
||
|
tag_index: "LOOP"
|
||
|
back_edge: true
|
||
|
}
|
||
|
output_stream: "PREV_LOOP:prev_box_rects_from_landmarks"
|
||
|
}
|