mediapipe-rs/mediapipe/modules/objectron/objectron_gpu.pbtxt

# MediaPipe Objectron on GPU that produces 3D bounding boxes for objects.
type: "ObjectronGpuSubgraph"

# Input/Output streams and input side packets.
# Note that the input image is assumed to have aspect ratio 3:4 (width:height).
input_stream: "IMAGE_GPU:image"
# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera
input_side_packet: "LABELS_CSV:allowed_labels"
# Max number of objects to detect/track. (int)
input_side_packet: "MAX_NUM_OBJECTS:max_num_objects"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"

# Collection of detected 3D objects, represented as a FrameAnnotation.
output_stream: "FRAME_ANNOTATION:detected_objects"

# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
  calculator: "GateCalculator"
  input_side_packet: "ALLOW:use_prev_landmarks"
  input_stream: "prev_box_rects_from_landmarks"
  output_stream: "gated_prev_box_rects_from_landmarks"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      allow: true
    }
  }
}

# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided max_num_objects.
node {
  calculator: "NormalizedRectVectorHasMinSizeCalculator"
  input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks"
  input_side_packet: "max_num_objects"
  output_stream: "prev_has_enough_objects"
}

# Drops the incoming image if BoxLandmarkSubgraph was able to identify box
# presence in the previous image. Otherwise, passes the incoming image through
# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph.
node {
  calculator: "GateCalculator"
  input_stream: "image"
  input_stream: "DISALLOW:prev_has_enough_objects"
  output_stream: "detection_image"

  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      empty_packets_as_allow: true
    }
  }
}

# Subgraph that performs 2D object detection.
node {
  calculator: "ObjectDetectionOidV4Subgraph"
  input_stream: "IMAGE_GPU:detection_image"
  input_side_packet: "LABELS_CSV:allowed_labels"
  output_stream: "DETECTIONS:raw_detections"
}

# Makes sure there are no more detections than provided max_num_objects.
node {
  calculator: "ClipDetectionVectorSizeCalculator"
  input_stream: "raw_detections"
  output_stream: "detections"
  input_side_packet: "max_num_objects"

}

# Extracts image size from the input images.
node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_GPU:image"
  output_stream: "SIZE:image_size"
}

# Converts results of box detection into rectangles (normalized by image size)
# that encloses the box.
node {
  calculator: "DetectionsToRectsCalculator"
  input_stream: "DETECTIONS:detections"
  input_stream: "IMAGE_SIZE:image_size"
  output_stream: "NORM_RECTS:box_rects_from_detections"
  options: {
    [mediapipe.DetectionsToRectsCalculatorOptions.ext] {
      output_zero_rect_for_empty_detections: false
    }
  }
}

# Performs association between NormalizedRect vector elements from previous
# image and rects based on object detections from the current image. This
# calculator ensures that the output box_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
  calculator: "AssociationNormRectCalculator"
  input_stream: "box_rects_from_detections"
  input_stream: "gated_prev_box_rects_from_landmarks"
  output_stream: "box_rects"
  options: {
    [mediapipe.AssociationCalculatorOptions.ext] {
      min_similarity_threshold: 0.2
    }
  }
}

# Outputs each element of box_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_box_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
  calculator: "BeginLoopNormalizedRectCalculator"
  input_stream: "ITERABLE:box_rects"
  input_stream: "CLONE:image"
  output_stream: "ITEM:single_box_rect"
  output_stream: "CLONE:landmarks_image"
  output_stream: "BATCH_END:box_rects_timestamp"
}

# Subgraph that localizes box landmarks.
node {
  calculator: "BoxLandmarkSubgraph"
  input_stream: "IMAGE:landmarks_image"
  input_stream: "NORM_RECT:single_box_rect"
  output_stream: "NORM_LANDMARKS:single_box_landmarks"
}

# Collects a set of landmarks for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
  calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
  input_stream: "ITEM:single_box_landmarks"
  input_stream: "BATCH_END:box_rects_timestamp"
  output_stream: "ITERABLE:multi_box_landmarks"
}

# Convert box landmarks to frame annotations.
node {
  calculator: "LandmarksToFrameAnnotationCalculator"
  input_stream: "MULTI_LANDMARKS:multi_box_landmarks"
  output_stream: "FRAME_ANNOTATION:box_annotations"
}

# Lift the 2D landmarks to 3D using EPnP algorithm.
node {
  calculator: "Lift2DFrameAnnotationTo3DCalculator"
  input_stream: "FRAME_ANNOTATION:box_annotations"
  output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects"
  options: {
    [mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] {
      normalized_focal_x: 2.0975
      normalized_focal_y: 1.5731
    }
  }
}

# Get rotated rectangle from detected box.
node {
  calculator: "FrameAnnotationToRectCalculator"
  input_stream: "FRAME_ANNOTATION:detected_objects"
  output_stream: "NORM_RECTS:box_rects_from_landmarks"
}

# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the
# arrival of the next input image sends out the cached rectangle with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous box rectangle. Note that upon the arrival of the
# very first input image, an empty packet is sent out to jump start the
# feedback loop.
node {
  calculator: "PreviousLoopbackCalculator"
  input_stream: "MAIN:image"
  input_stream: "LOOP:box_rects_from_landmarks"
  input_stream_info: {
    tag_index: "LOOP"
    back_edge: true
  }
  output_stream: "PREV_LOOP:prev_box_rects_from_landmarks"
}
add mediapipe modules to fix examples 2022-06-11 21:25:48 +02:00			`# MediaPipe Objectron on GPU that produces 3D bounding boxes for objects.`
			`type: "ObjectronGpuSubgraph"`

			`# Input/Output streams and input side packets.`
			`# Note that the input image is assumed to have aspect ratio 3:4 (width:height).`
			`input_stream: "IMAGE_GPU:image"`
			`# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera`
			`input_side_packet: "LABELS_CSV:allowed_labels"`
			`# Max number of objects to detect/track. (int)`
			`input_side_packet: "MAX_NUM_OBJECTS:max_num_objects"`
			`# Whether landmarks on the previous image should be used to help localize`
			`# landmarks on the current image. (bool)`
			`input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"`

			`# Collection of detected 3D objects, represented as a FrameAnnotation.`
			`output_stream: "FRAME_ANNOTATION:detected_objects"`

			`# When the optional input side packet "use_prev_landmarks" is either absent or`
			`# set to true, uses the landmarks on the previous image to help localize`
			`# landmarks on the current image.`
			`node {`
			`calculator: "GateCalculator"`
			`input_side_packet: "ALLOW:use_prev_landmarks"`
			`input_stream: "prev_box_rects_from_landmarks"`
			`output_stream: "gated_prev_box_rects_from_landmarks"`
			`options: {`
			`[mediapipe.GateCalculatorOptions.ext] {`
			`allow: true`
			`}`
			`}`
			`}`

			`# Determines if an input vector of NormalizedRect has a size greater than or`
			`# equal to the provided max_num_objects.`
			`node {`
			`calculator: "NormalizedRectVectorHasMinSizeCalculator"`
			`input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks"`
			`input_side_packet: "max_num_objects"`
			`output_stream: "prev_has_enough_objects"`
			`}`

			`# Drops the incoming image if BoxLandmarkSubgraph was able to identify box`
			`# presence in the previous image. Otherwise, passes the incoming image through`
			`# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph.`
			`node {`
			`calculator: "GateCalculator"`
			`input_stream: "image"`
			`input_stream: "DISALLOW:prev_has_enough_objects"`
			`output_stream: "detection_image"`

			`options: {`
			`[mediapipe.GateCalculatorOptions.ext] {`
			`empty_packets_as_allow: true`
			`}`
			`}`
			`}`

			`# Subgraph that performs 2D object detection.`
			`node {`
			`calculator: "ObjectDetectionOidV4Subgraph"`
			`input_stream: "IMAGE_GPU:detection_image"`
			`input_side_packet: "LABELS_CSV:allowed_labels"`
			`output_stream: "DETECTIONS:raw_detections"`
			`}`

			`# Makes sure there are no more detections than provided max_num_objects.`
			`node {`
			`calculator: "ClipDetectionVectorSizeCalculator"`
			`input_stream: "raw_detections"`
			`output_stream: "detections"`
			`input_side_packet: "max_num_objects"`

			`}`

			`# Extracts image size from the input images.`
			`node {`
			`calculator: "ImagePropertiesCalculator"`
			`input_stream: "IMAGE_GPU:image"`
			`output_stream: "SIZE:image_size"`
			`}`

			`# Converts results of box detection into rectangles (normalized by image size)`
			`# that encloses the box.`
			`node {`
			`calculator: "DetectionsToRectsCalculator"`
			`input_stream: "DETECTIONS:detections"`
			`input_stream: "IMAGE_SIZE:image_size"`
			`output_stream: "NORM_RECTS:box_rects_from_detections"`
			`options: {`
			`[mediapipe.DetectionsToRectsCalculatorOptions.ext] {`
			`output_zero_rect_for_empty_detections: false`
			`}`
			`}`
			`}`

			`# Performs association between NormalizedRect vector elements from previous`
			`# image and rects based on object detections from the current image. This`
			`# calculator ensures that the output box_rects vector doesn't contain`
			`# overlapping regions based on the specified min_similarity_threshold.`
			`node {`
			`calculator: "AssociationNormRectCalculator"`
			`input_stream: "box_rects_from_detections"`
			`input_stream: "gated_prev_box_rects_from_landmarks"`
			`output_stream: "box_rects"`
			`options: {`
			`[mediapipe.AssociationCalculatorOptions.ext] {`
			`min_similarity_threshold: 0.2`
			`}`
			`}`
			`}`

			`# Outputs each element of box_rects at a fake timestamp for the rest of the`
			`# graph to process. Clones image and image size packets for each`
			`# single_box_rect at the fake timestamp. At the end of the loop, outputs the`
			`# BATCH_END timestamp for downstream calculators to inform them that all`
			`# elements in the vector have been processed.`
			`node {`
			`calculator: "BeginLoopNormalizedRectCalculator"`
			`input_stream: "ITERABLE:box_rects"`
			`input_stream: "CLONE:image"`
			`output_stream: "ITEM:single_box_rect"`
			`output_stream: "CLONE:landmarks_image"`
			`output_stream: "BATCH_END:box_rects_timestamp"`
			`}`

			`# Subgraph that localizes box landmarks.`
			`node {`
			`calculator: "BoxLandmarkSubgraph"`
			`input_stream: "IMAGE:landmarks_image"`
			`input_stream: "NORM_RECT:single_box_rect"`
			`output_stream: "NORM_LANDMARKS:single_box_landmarks"`
			`}`

			`# Collects a set of landmarks for each hand into a vector. Upon receiving the`
			`# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END`
			`# timestamp.`
			`node {`
			`calculator: "EndLoopNormalizedLandmarkListVectorCalculator"`
			`input_stream: "ITEM:single_box_landmarks"`
			`input_stream: "BATCH_END:box_rects_timestamp"`
			`output_stream: "ITERABLE:multi_box_landmarks"`
			`}`

			`# Convert box landmarks to frame annotations.`
			`node {`
			`calculator: "LandmarksToFrameAnnotationCalculator"`
			`input_stream: "MULTI_LANDMARKS:multi_box_landmarks"`
			`output_stream: "FRAME_ANNOTATION:box_annotations"`
			`}`

			`# Lift the 2D landmarks to 3D using EPnP algorithm.`
			`node {`
			`calculator: "Lift2DFrameAnnotationTo3DCalculator"`
			`input_stream: "FRAME_ANNOTATION:box_annotations"`
			`output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects"`
			`options: {`
			`[mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] {`
			`normalized_focal_x: 2.0975`
			`normalized_focal_y: 1.5731`
			`}`
			`}`
			`}`

			`# Get rotated rectangle from detected box.`
			`node {`
			`calculator: "FrameAnnotationToRectCalculator"`
			`input_stream: "FRAME_ANNOTATION:detected_objects"`
			`output_stream: "NORM_RECTS:box_rects_from_landmarks"`
			`}`

			`# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the`
			`# arrival of the next input image sends out the cached rectangle with the`
			`# timestamp replaced by that of the input image, essentially generating a packet`
			`# that carries the previous box rectangle. Note that upon the arrival of the`
			`# very first input image, an empty packet is sent out to jump start the`
			`# feedback loop.`
			`node {`
			`calculator: "PreviousLoopbackCalculator"`
			`input_stream: "MAIN:image"`
			`input_stream: "LOOP:box_rects_from_landmarks"`
			`input_stream_info: {`
			`tag_index: "LOOP"`
			`back_edge: true`
			`}`
			`output_stream: "PREV_LOOP:prev_box_rects_from_landmarks"`
			`}`