# MediaPipe graph that performs object detection on desktop with TensorFlow Lite # on CPU. # Used in the example in # mediapipe/examples/desktop/object_detection:object_detection_tflite. # max_queue_size limits the number of packets enqueued on any input stream # by throttling inputs to the graph. This makes the graph only process one # frame per time. max_queue_size: 1 # Decodes an input video file into images and a video header. node { calculator: "OpenCvVideoDecoderCalculator" input_side_packet: "INPUT_FILE_PATH:input_video_path" output_stream: "VIDEO:input_video" output_stream: "VIDEO_PRESTREAM:input_video_header" } # Transforms the input image on CPU to a 320x320 image. To scale the image, by # default it uses the STRETCH scale mode that maps the entire input image to the # entire transformed image. As a result, image aspect ratio may be changed and # objects in the image may be deformed (stretched or squeezed), but the object # detection model used in this graph is agnostic to that deformation. node: { calculator: "ImageTransformationCalculator" input_stream: "IMAGE:input_video" output_stream: "IMAGE:transformed_input_video" node_options: { [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { output_width: 320 output_height: 320 } } } # Converts the transformed input image on CPU into an image tensor as a # TfLiteTensor. The zero_center option is set to true to normalize the # pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f]. node { calculator: "TfLiteConverterCalculator" input_stream: "IMAGE:transformed_input_video" output_stream: "TENSORS:image_tensor" node_options: { [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] { zero_center: true } } } # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a # vector of tensors representing, for instance, detection boxes/keypoints and # scores. node { calculator: "TfLiteInferenceCalculator" input_stream: "TENSORS:image_tensor" output_stream: "TENSORS:detection_tensors" node_options: { [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { model_path: "mediapipe/models/ssdlite_object_detection.tflite" } } } # Generates a single side packet containing a vector of SSD anchors based on # the specification in the options. node { calculator: "SsdAnchorsCalculator" output_side_packet: "anchors" node_options: { [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] { num_layers: 6 min_scale: 0.2 max_scale: 0.95 input_size_height: 320 input_size_width: 320 anchor_offset_x: 0.5 anchor_offset_y: 0.5 strides: 16 strides: 32 strides: 64 strides: 128 strides: 256 strides: 512 aspect_ratios: 1.0 aspect_ratios: 2.0 aspect_ratios: 0.5 aspect_ratios: 3.0 aspect_ratios: 0.3333 reduce_boxes_in_lowest_layer: true } } } # Decodes the detection tensors generated by the TensorFlow Lite model, based on # the SSD anchors and the specification in the options, into a vector of # detections. Each detection describes a detected object. node { calculator: "TfLiteTensorsToDetectionsCalculator" input_stream: "TENSORS:detection_tensors" input_side_packet: "ANCHORS:anchors" output_stream: "DETECTIONS:detections" node_options: { [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] { num_classes: 91 num_boxes: 2034 num_coords: 4 ignore_classes: 0 apply_exponential_on_box_size: true x_scale: 10.0 y_scale: 10.0 h_scale: 5.0 w_scale: 5.0 } } } # Performs non-max suppression to remove excessive detections. node { calculator: "NonMaxSuppressionCalculator" input_stream: "detections" output_stream: "filtered_detections" node_options: { [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] { min_suppression_threshold: 0.4 min_score_threshold: 0.6 max_num_detections: 5 overlap_type: INTERSECTION_OVER_UNION } } } # Maps detection label IDs to the corresponding label text. The label map is # provided in the label_map_path option. node { calculator: "DetectionLabelIdToTextCalculator" input_stream: "filtered_detections" output_stream: "output_detections" node_options: { [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] { label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt" } } } # Converts the detections to drawing primitives for annotation overlay. node { calculator: "DetectionsToRenderDataCalculator" input_stream: "DETECTIONS:output_detections" output_stream: "RENDER_DATA:render_data" node_options: { [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { thickness: 4.0 color { r: 255 g: 0 b: 0 } } } } # Draws annotations and overlays them on top of the input images. node { calculator: "AnnotationOverlayCalculator" input_stream: "IMAGE:input_video" input_stream: "render_data" output_stream: "IMAGE:output_video" } # Encodes the annotated images into a video file, adopting properties specified # in the input video header, e.g., video framerate. node { calculator: "OpenCvVideoEncoderCalculator" input_stream: "VIDEO:output_video" input_stream: "VIDEO_PRESTREAM:input_video_header" input_side_packet: "OUTPUT_FILE_PATH:output_video_path" node_options: { [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { codec: "avc1" video_format: "mp4" } } }