# MediaPipe Objectron on CPU that produces 3D bounding boxes for objects. type: "ObjectronCpuSubgraph" # Input/Output streams and input side packets. input_stream: "IMAGE:image" # Path to TfLite model for 3D bounding box landmark prediction input_side_packet: "MODEL_PATH:box_landmark_model_path" # Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera input_side_packet: "LABELS_CSV:allowed_labels" # Max number of objects to detect/track. (int) input_side_packet: "MAX_NUM_OBJECTS:max_num_objects" # Whether landmarks on the previous image should be used to help localize # landmarks on the current image. (bool) input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" # Bounding box landmarks topology definition. # The numbers are indices in the box_landmarks list. # # 3 + + + + + + + + 7 # +\ +\ UP # + \ + \ # + \ + \ | # + 4 + + + + + + + + 8 | y # + + + + | # + + + + | # + + (0) + + .------- x # + + + + \ # 1 + + + + + + + + 5 + \ # \ + \ + \ z # \ + \ + \ # \+ \+ # 2 + + + + + + + + 6 # Collection of detected 3D objects, represented as a FrameAnnotation. output_stream: "FRAME_ANNOTATION:detected_objects" # Collection of box landmarks. (NormalizedLandmarkList) output_stream: "MULTI_LANDMARKS:multi_box_landmarks" # Crop rectangles derived from bounding box landmarks. output_stream: "NORM_RECTS:multi_box_rects" # Loads the file in the specified path into a blob. node { calculator: "LocalFileContentsCalculator" input_side_packet: "FILE_PATH:0:box_landmark_model_path" output_side_packet: "CONTENTS:0:box_landmark_model_blob" } # Converts the input blob into a TF Lite model. node { calculator: "TfLiteModelCalculator" input_side_packet: "MODEL_BLOB:box_landmark_model_blob" output_side_packet: "MODEL:box_landmark_model" } # When the optional input side packet "use_prev_landmarks" is either absent or # set to true, uses the landmarks on the previous image to help localize # landmarks on the current image. node { calculator: "GateCalculator" input_side_packet: "ALLOW:use_prev_landmarks" input_stream: "prev_box_rects_from_landmarks" output_stream: "gated_prev_box_rects_from_landmarks" options: { [mediapipe.GateCalculatorOptions.ext] { allow: true } } } # Determines if an input vector of NormalizedRect has a size greater than or # equal to the provided max_num_objects. node { calculator: "NormalizedRectVectorHasMinSizeCalculator" input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks" input_side_packet: "max_num_objects" output_stream: "prev_has_enough_objects" } # Drops the incoming image if BoxLandmarkSubgraph was able to identify box # presence in the previous image. Otherwise, passes the incoming image through # to trigger a new round of box detection in ObjectDetectionOidV4Subgraph. node { calculator: "GateCalculator" input_stream: "image" input_stream: "DISALLOW:prev_has_enough_objects" output_stream: "detection_image" options: { [mediapipe.GateCalculatorOptions.ext] { empty_packets_as_allow: true } } } # Subgraph that performs 2D object detection. node { calculator: "ObjectDetectionOidV4Subgraph" input_stream: "IMAGE:detection_image" input_side_packet: "LABELS_CSV:allowed_labels" output_stream: "DETECTIONS:raw_detections" } # Makes sure there are no more detections than provided max_num_objects. node { calculator: "ClipDetectionVectorSizeCalculator" input_stream: "raw_detections" output_stream: "detections" input_side_packet: "max_num_objects" } # Extracts image size from the input images. node { calculator: "ImagePropertiesCalculator" input_stream: "IMAGE_CPU:image" output_stream: "SIZE:image_size" } # Converts results of box detection into rectangles (normalized by image size) # that encloses the box. node { calculator: "DetectionsToRectsCalculator" input_stream: "DETECTIONS:detections" input_stream: "IMAGE_SIZE:image_size" output_stream: "NORM_RECTS:box_rects_from_detections" options: { [mediapipe.DetectionsToRectsCalculatorOptions.ext] { output_zero_rect_for_empty_detections: false } } } # Performs association between NormalizedRect vector elements from previous # image and rects based on object detections from the current image. This # calculator ensures that the output box_rects vector doesn't contain # overlapping regions based on the specified min_similarity_threshold. node { calculator: "AssociationNormRectCalculator" input_stream: "box_rects_from_detections" input_stream: "gated_prev_box_rects_from_landmarks" output_stream: "multi_box_rects" options: { [mediapipe.AssociationCalculatorOptions.ext] { min_similarity_threshold: 0.2 } } } # Outputs each element of box_rects at a fake timestamp for the rest of the # graph to process. Clones image and image size packets for each # single_box_rect at the fake timestamp. At the end of the loop, outputs the # BATCH_END timestamp for downstream calculators to inform them that all # elements in the vector have been processed. node { calculator: "BeginLoopNormalizedRectCalculator" input_stream: "ITERABLE:multi_box_rects" input_stream: "CLONE:image" output_stream: "ITEM:single_box_rect" output_stream: "CLONE:landmarks_image" output_stream: "BATCH_END:box_rects_timestamp" } # Subgraph that localizes box landmarks. node { calculator: "BoxLandmarkSubgraph" input_stream: "IMAGE:landmarks_image" input_side_packet: "MODEL:box_landmark_model" input_stream: "NORM_RECT:single_box_rect" output_stream: "NORM_LANDMARKS:single_box_landmarks" } # Collects a set of landmarks for each hand into a vector. Upon receiving the # BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END # timestamp. node { calculator: "EndLoopNormalizedLandmarkListVectorCalculator" input_stream: "ITEM:single_box_landmarks" input_stream: "BATCH_END:box_rects_timestamp" output_stream: "ITERABLE:multi_box_landmarks" } # Convert box landmarks to frame annotations. node { calculator: "LandmarksToFrameAnnotationCalculator" input_stream: "MULTI_LANDMARKS:multi_box_landmarks" output_stream: "FRAME_ANNOTATION:box_annotations" } # Lift the 2D landmarks to 3D using EPnP algorithm. node { name: "Lift2DFrameAnnotationTo3DCalculator" calculator: "Lift2DFrameAnnotationTo3DCalculator" input_stream: "FRAME_ANNOTATION:box_annotations" output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects" options: { [mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] { normalized_focal_x: 1.0 normalized_focal_y: 1.0 } } } # Get rotated rectangle from detected box. node { calculator: "FrameAnnotationToRectCalculator" input_stream: "FRAME_ANNOTATION:detected_objects" output_stream: "NORM_RECTS:box_rects_from_landmarks" } # Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the # arrival of the next input image sends out the cached rectangle with the # timestamp replaced by that of the input image, essentially generating a packet # that carries the previous box rectangle. Note that upon the arrival of the # very first input image, an empty packet is sent out to jump start the # feedback loop. node { calculator: "PreviousLoopbackCalculator" input_stream: "MAIN:image" input_stream: "LOOP:box_rects_from_landmarks" input_stream_info: { tag_index: "LOOP" back_edge: true } output_stream: "PREV_LOOP:prev_box_rects_from_landmarks" }