mediapipe-rs/mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt

# MediaPipe graph that performs object detection on desktop with TensorFlow Lite
# on CPU.
# Used in the example in
# mediapipe/examples/desktop/object_detection:object_detection_tflite.

# max_queue_size limits the number of packets enqueued on any input stream
# by throttling inputs to the graph. This makes the graph only process one
# frame per time.
max_queue_size: 1

# Decodes an input video file into images and a video header.
node {
  calculator: "OpenCvVideoDecoderCalculator"
  input_side_packet: "INPUT_FILE_PATH:input_video_path"
  output_stream: "VIDEO:input_video"
  output_stream: "VIDEO_PRESTREAM:input_video_header"
}

# Transforms the input image on CPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
  calculator: "ImageTransformationCalculator"
  input_stream: "IMAGE:input_video"
  output_stream: "IMAGE:transformed_input_video"
  node_options: {
    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
      output_width: 320
      output_height: 320
    }
  }
}

# Converts the transformed input image on CPU into an image tensor as a
# TfLiteTensor. The zero_center option is set to true to normalize the
# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
node {
  calculator: "TfLiteConverterCalculator"
  input_stream: "IMAGE:transformed_input_video"
  output_stream: "TENSORS:image_tensor"
  node_options: {
    [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
      zero_center: true
    }
  }
}

# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
  calculator: "TfLiteInferenceCalculator"
  input_stream: "TENSORS:image_tensor"
  output_stream: "TENSORS:detection_tensors"
  node_options: {
    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
      model_path: "mediapipe/models/ssdlite_object_detection.tflite"
    }
  }
}

# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
  calculator: "SsdAnchorsCalculator"
  output_side_packet: "anchors"
  node_options: {
    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
      num_layers: 6
      min_scale: 0.2
      max_scale: 0.95
      input_size_height: 320
      input_size_width: 320
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 16
      strides: 32
      strides: 64
      strides: 128
      strides: 256
      strides: 512
      aspect_ratios: 1.0
      aspect_ratios: 2.0
      aspect_ratios: 0.5
      aspect_ratios: 3.0
      aspect_ratios: 0.3333
      reduce_boxes_in_lowest_layer: true
    }
  }
}

# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
  calculator: "TfLiteTensorsToDetectionsCalculator"
  input_stream: "TENSORS:detection_tensors"
  input_side_packet: "ANCHORS:anchors"
  output_stream: "DETECTIONS:detections"
  node_options: {
    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
      num_classes: 91
      num_boxes: 2034
      num_coords: 4
      ignore_classes: 0
      apply_exponential_on_box_size: true

      x_scale: 10.0
      y_scale: 10.0
      h_scale: 5.0
      w_scale: 5.0
    }
  }
}

# Performs non-max suppression to remove excessive detections.
node {
  calculator: "NonMaxSuppressionCalculator"
  input_stream: "detections"
  output_stream: "filtered_detections"
  node_options: {
    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
      min_suppression_threshold: 0.4
      min_score_threshold: 0.6
      max_num_detections: 5
      overlap_type: INTERSECTION_OVER_UNION
    }
  }
}

# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
  calculator: "DetectionLabelIdToTextCalculator"
  input_stream: "filtered_detections"
  output_stream: "output_detections"
  node_options: {
    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
      label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
    }
  }
}

# Converts the detections to drawing primitives for annotation overlay.
node {
  calculator: "DetectionsToRenderDataCalculator"
  input_stream: "DETECTIONS:output_detections"
  output_stream: "RENDER_DATA:render_data"
  node_options: {
    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
      thickness: 4.0
      color { r: 255 g: 0 b: 0 }
    }
  }
}

# Draws annotations and overlays them on top of the input images.
node {
  calculator: "AnnotationOverlayCalculator"
  input_stream: "IMAGE:input_video"
  input_stream: "render_data"
  output_stream: "IMAGE:output_video"
}

# Encodes the annotated images into a video file, adopting properties specified
# in the input video header, e.g., video framerate.
node {
  calculator: "OpenCvVideoEncoderCalculator"
  input_stream: "VIDEO:output_video"
  input_stream: "VIDEO_PRESTREAM:input_video_header"
  input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
  node_options: {
    [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
      codec: "avc1"
      video_format: "mp4"
    }
  }
}
code fill 2022-03-01 13:04:01 +01:00			`# MediaPipe graph that performs object detection on desktop with TensorFlow Lite`
			`# on CPU.`
			`# Used in the example in`
			`# mediapipe/examples/desktop/object_detection:object_detection_tflite.`

			`# max_queue_size limits the number of packets enqueued on any input stream`
			`# by throttling inputs to the graph. This makes the graph only process one`
			`# frame per time.`
			`max_queue_size: 1`

			`# Decodes an input video file into images and a video header.`
			`node {`
			`calculator: "OpenCvVideoDecoderCalculator"`
			`input_side_packet: "INPUT_FILE_PATH:input_video_path"`
			`output_stream: "VIDEO:input_video"`
			`output_stream: "VIDEO_PRESTREAM:input_video_header"`
			`}`

			`# Transforms the input image on CPU to a 320x320 image. To scale the image, by`
			`# default it uses the STRETCH scale mode that maps the entire input image to the`
			`# entire transformed image. As a result, image aspect ratio may be changed and`
			`# objects in the image may be deformed (stretched or squeezed), but the object`
			`# detection model used in this graph is agnostic to that deformation.`
			`node: {`
			`calculator: "ImageTransformationCalculator"`
			`input_stream: "IMAGE:input_video"`
			`output_stream: "IMAGE:transformed_input_video"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {`
			`output_width: 320`
			`output_height: 320`
			`}`
			`}`
			`}`

			`# Converts the transformed input image on CPU into an image tensor as a`
			`# TfLiteTensor. The zero_center option is set to true to normalize the`
			`# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].`
			`node {`
			`calculator: "TfLiteConverterCalculator"`
			`input_stream: "IMAGE:transformed_input_video"`
			`output_stream: "TENSORS:image_tensor"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {`
			`zero_center: true`
			`}`
			`}`
			`}`

			`# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a`
			`# vector of tensors representing, for instance, detection boxes/keypoints and`
			`# scores.`
			`node {`
			`calculator: "TfLiteInferenceCalculator"`
			`input_stream: "TENSORS:image_tensor"`
			`output_stream: "TENSORS:detection_tensors"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {`
			`model_path: "mediapipe/models/ssdlite_object_detection.tflite"`
			`}`
			`}`
			`}`

			`# Generates a single side packet containing a vector of SSD anchors based on`
			`# the specification in the options.`
			`node {`
			`calculator: "SsdAnchorsCalculator"`
			`output_side_packet: "anchors"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {`
			`num_layers: 6`
			`min_scale: 0.2`
			`max_scale: 0.95`
			`input_size_height: 320`
			`input_size_width: 320`
			`anchor_offset_x: 0.5`
			`anchor_offset_y: 0.5`
			`strides: 16`
			`strides: 32`
			`strides: 64`
			`strides: 128`
			`strides: 256`
			`strides: 512`
			`aspect_ratios: 1.0`
			`aspect_ratios: 2.0`
			`aspect_ratios: 0.5`
			`aspect_ratios: 3.0`
			`aspect_ratios: 0.3333`
			`reduce_boxes_in_lowest_layer: true`
			`}`
			`}`
			`}`

			`# Decodes the detection tensors generated by the TensorFlow Lite model, based on`
			`# the SSD anchors and the specification in the options, into a vector of`
			`# detections. Each detection describes a detected object.`
			`node {`
			`calculator: "TfLiteTensorsToDetectionsCalculator"`
			`input_stream: "TENSORS:detection_tensors"`
			`input_side_packet: "ANCHORS:anchors"`
			`output_stream: "DETECTIONS:detections"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {`
			`num_classes: 91`
			`num_boxes: 2034`
			`num_coords: 4`
			`ignore_classes: 0`
			`apply_exponential_on_box_size: true`

			`x_scale: 10.0`
			`y_scale: 10.0`
			`h_scale: 5.0`
			`w_scale: 5.0`
			`}`
			`}`
			`}`

			`# Performs non-max suppression to remove excessive detections.`
			`node {`
			`calculator: "NonMaxSuppressionCalculator"`
			`input_stream: "detections"`
			`output_stream: "filtered_detections"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {`
			`min_suppression_threshold: 0.4`
			`min_score_threshold: 0.6`
			`max_num_detections: 5`
			`overlap_type: INTERSECTION_OVER_UNION`
			`}`
			`}`
			`}`

			`# Maps detection label IDs to the corresponding label text. The label map is`
			`# provided in the label_map_path option.`
			`node {`
			`calculator: "DetectionLabelIdToTextCalculator"`
			`input_stream: "filtered_detections"`
			`output_stream: "output_detections"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {`
			`label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"`
			`}`
			`}`
			`}`

			`# Converts the detections to drawing primitives for annotation overlay.`
			`node {`
			`calculator: "DetectionsToRenderDataCalculator"`
			`input_stream: "DETECTIONS:output_detections"`
			`output_stream: "RENDER_DATA:render_data"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {`
			`thickness: 4.0`
			`color { r: 255 g: 0 b: 0 }`
			`}`
			`}`
			`}`

			`# Draws annotations and overlays them on top of the input images.`
			`node {`
			`calculator: "AnnotationOverlayCalculator"`
			`input_stream: "IMAGE:input_video"`
			`input_stream: "render_data"`
			`output_stream: "IMAGE:output_video"`
			`}`

			`# Encodes the annotated images into a video file, adopting properties specified`
			`# in the input video header, e.g., video framerate.`
			`node {`
			`calculator: "OpenCvVideoEncoderCalculator"`
			`input_stream: "VIDEO:output_video"`
			`input_stream: "VIDEO_PRESTREAM:input_video_header"`
			`input_side_packet: "OUTPUT_FILE_PATH:output_video_path"`
			`node_options: {`
			`[type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {`
			`codec: "avc1"`
			`video_format: "mp4"`
			`}`
			`}`
			`}`