mediapipe-rs/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt

# MediaPipe graph to detect poses. (GPU input, and inference is executed on
# GPU.)
#
# It is required that "pose_detection.tflite" is available at
# "mediapipe/modules/pose_detection/pose_detection.tflite"
# path during execution.
#
# EXAMPLE:
#   node {
#     calculator: "PoseDetectionGpu"
#     input_stream: "IMAGE:image"
#     output_stream: "DETECTIONS:pose_detections"
#   }

type: "PoseDetectionGpu"

# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"

# Detected poses. (std::vector<Detection>)
# Bounding box in each pose detection is currently set to the bounding box of
# the detected face. However, 4 additional key points are available in each
# detection, which are used to further calculate a (rotated) bounding box that
# encloses the body region of interest. Among the 4 key points, the first two
# are for identifying the full-body region, and the second two for upper body
# only:
#
# Key point 0 - mid hip center
# Key point 1 - point that encodes size & rotation (for full body)
# Key point 2 - mid shoulder center
# Key point 3 - point that encodes size & rotation (for upper body)
#
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of poses detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"

# Transforms the input image into a 224x224 one while keeping the aspect ratio
# (what is expected by the corresponding model), resulting in potential
# letterboxing in the transformed image.
node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE_GPU:image"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 224
      output_tensor_height: 224
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: -1.0
        max: 1.0
      }
      border_mode: BORDER_ZERO
      gpu_origin: TOP_LEFT
    }
  }
}

# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:detection_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"
      #
      delegate: { gpu { use_advanced_gpu_api: true } }
    }
  }
}

# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
  calculator: "SsdAnchorsCalculator"
  output_side_packet: "anchors"
  options: {
    [mediapipe.SsdAnchorsCalculatorOptions.ext] {
      num_layers: 5
      min_scale: 0.1484375
      max_scale: 0.75
      input_size_height: 224
      input_size_width: 224
      anchor_offset_x: 0.5
      anchor_offset_y: 0.5
      strides: 8
      strides: 16
      strides: 32
      strides: 32
      strides: 32
      aspect_ratios: 1.0
      fixed_anchor_size: true
    }
  }
}

# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
  calculator: "TensorsToDetectionsCalculator"
  input_stream: "TENSORS:detection_tensors"
  input_side_packet: "ANCHORS:anchors"
  output_stream: "DETECTIONS:unfiltered_detections"
  options: {
    [mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
      num_classes: 1
      num_boxes: 2254
      num_coords: 12
      box_coord_offset: 0
      keypoint_coord_offset: 4
      num_keypoints: 4
      num_values_per_keypoint: 2
      sigmoid_score: true
      score_clipping_thresh: 100.0
      reverse_output_order: true
      x_scale: 224.0
      y_scale: 224.0
      h_scale: 224.0
      w_scale: 224.0
      min_score_thresh: 0.5
    }
  }
}

# Performs non-max suppression to remove excessive detections.
node {
  calculator: "NonMaxSuppressionCalculator"
  input_stream: "unfiltered_detections"
  output_stream: "filtered_detections"
  options: {
    [mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
      min_suppression_threshold: 0.3
      overlap_type: INTERSECTION_OVER_UNION
      algorithm: WEIGHTED
    }
  }
}

# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
  calculator: "DetectionLetterboxRemovalCalculator"
  input_stream: "DETECTIONS:filtered_detections"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "DETECTIONS:detections"
}
add mediapipe modules to fix examples 2022-06-11 21:25:48 +02:00			`# MediaPipe graph to detect poses. (GPU input, and inference is executed on`
			`# GPU.)`
			`#`
			`# It is required that "pose_detection.tflite" is available at`
			`# "mediapipe/modules/pose_detection/pose_detection.tflite"`
			`# path during execution.`
			`#`
			`# EXAMPLE:`
			`# node {`
			`# calculator: "PoseDetectionGpu"`
			`# input_stream: "IMAGE:image"`
			`# output_stream: "DETECTIONS:pose_detections"`
			`# }`

			`type: "PoseDetectionGpu"`

			`# GPU image. (GpuBuffer)`
			`input_stream: "IMAGE:image"`

			`# Detected poses. (std::vector<Detection>)`
			`# Bounding box in each pose detection is currently set to the bounding box of`
			`# the detected face. However, 4 additional key points are available in each`
			`# detection, which are used to further calculate a (rotated) bounding box that`
			`# encloses the body region of interest. Among the 4 key points, the first two`
			`# are for identifying the full-body region, and the second two for upper body`
			`# only:`
			`#`
			`# Key point 0 - mid hip center`
			`# Key point 1 - point that encodes size & rotation (for full body)`
			`# Key point 2 - mid shoulder center`
			`# Key point 3 - point that encodes size & rotation (for upper body)`
			`#`
			`# NOTE: there will not be an output packet in the DETECTIONS stream for this`
			`# particular timestamp if none of poses detected. However, the MediaPipe`
			`# framework will internally inform the downstream calculators of the absence of`
			`# this packet so that they don't wait for it unnecessarily.`
			`output_stream: "DETECTIONS:detections"`

			`# Transforms the input image into a 224x224 one while keeping the aspect ratio`
			`# (what is expected by the corresponding model), resulting in potential`
			`# letterboxing in the transformed image.`
			`node: {`
			`calculator: "ImageToTensorCalculator"`
			`input_stream: "IMAGE_GPU:image"`
			`output_stream: "TENSORS:input_tensors"`
			`output_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`options: {`
			`[mediapipe.ImageToTensorCalculatorOptions.ext] {`
			`output_tensor_width: 224`
			`output_tensor_height: 224`
			`keep_aspect_ratio: true`
			`output_tensor_float_range {`
			`min: -1.0`
			`max: 1.0`
			`}`
			`border_mode: BORDER_ZERO`
			`gpu_origin: TOP_LEFT`
			`}`
			`}`
			`}`

			`# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a`
			`# vector of tensors representing, for instance, detection boxes/keypoints and`
			`# scores.`
			`node {`
			`calculator: "InferenceCalculator"`
			`input_stream: "TENSORS:input_tensors"`
			`output_stream: "TENSORS:detection_tensors"`
			`options: {`
			`[mediapipe.InferenceCalculatorOptions.ext] {`
			`model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"`
			`#`
			`delegate: { gpu { use_advanced_gpu_api: true } }`
			`}`
			`}`
			`}`

			`# Generates a single side packet containing a vector of SSD anchors based on`
			`# the specification in the options.`
			`node {`
			`calculator: "SsdAnchorsCalculator"`
			`output_side_packet: "anchors"`
			`options: {`
			`[mediapipe.SsdAnchorsCalculatorOptions.ext] {`
			`num_layers: 5`
			`min_scale: 0.1484375`
			`max_scale: 0.75`
			`input_size_height: 224`
			`input_size_width: 224`
			`anchor_offset_x: 0.5`
			`anchor_offset_y: 0.5`
			`strides: 8`
			`strides: 16`
			`strides: 32`
			`strides: 32`
			`strides: 32`
			`aspect_ratios: 1.0`
			`fixed_anchor_size: true`
			`}`
			`}`
			`}`

			`# Decodes the detection tensors generated by the TensorFlow Lite model, based on`
			`# the SSD anchors and the specification in the options, into a vector of`
			`# detections. Each detection describes a detected object.`
			`node {`
			`calculator: "TensorsToDetectionsCalculator"`
			`input_stream: "TENSORS:detection_tensors"`
			`input_side_packet: "ANCHORS:anchors"`
			`output_stream: "DETECTIONS:unfiltered_detections"`
			`options: {`
			`[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {`
			`num_classes: 1`
			`num_boxes: 2254`
			`num_coords: 12`
			`box_coord_offset: 0`
			`keypoint_coord_offset: 4`
			`num_keypoints: 4`
			`num_values_per_keypoint: 2`
			`sigmoid_score: true`
			`score_clipping_thresh: 100.0`
			`reverse_output_order: true`
			`x_scale: 224.0`
			`y_scale: 224.0`
			`h_scale: 224.0`
			`w_scale: 224.0`
			`min_score_thresh: 0.5`
			`}`
			`}`
			`}`

			`# Performs non-max suppression to remove excessive detections.`
			`node {`
			`calculator: "NonMaxSuppressionCalculator"`
			`input_stream: "unfiltered_detections"`
			`output_stream: "filtered_detections"`
			`options: {`
			`[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {`
			`min_suppression_threshold: 0.3`
			`overlap_type: INTERSECTION_OVER_UNION`
			`algorithm: WEIGHTED`
			`}`
			`}`
			`}`

			`# Adjusts detection locations (already normalized to [0.f, 1.f]) on the`
			`# letterboxed image (after image transformation with the FIT scale mode) to the`
			`# corresponding locations on the same image with the letterbox removed (the`
			`# input image to the graph before image transformation).`
			`node {`
			`calculator: "DetectionLetterboxRemovalCalculator"`
			`input_stream: "DETECTIONS:filtered_detections"`
			`input_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`output_stream: "DETECTIONS:detections"`
			`}`